library(tidyverse)
library(GenomicRanges)
library(edgeR)
library(scales)
library(extraChIPs)
library(here)
library(tadar)
library(kableExtra)
library(magrittr)
library(pander)
library(viridis)
library(reactable)
library(ggpubr)
theme_set(theme_bw())
lb_reactable <- function(
tbl, highlight = TRUE, striped = TRUE, compact = TRUE,
wrap = FALSE, resizable = TRUE, searchable = TRUE,
style = list(fontFamily = "Calibri, sans-serif"), ...
){
reactable(
tbl,
highlight = highlight, striped = striped, compact = compact,
wrap = wrap, resizable = TRUE, searchable = TRUE,
style = style, ...
)
}
tooltip <- function(value, tooltip) {
tags$abbr(
style = "text-decoration: underline; text-decoration-style: solid; cursor: help",
title = tooltip,
value
)
}
react_format <- function(format, digits = 2){
function(val, ind, col_name){
formatC(val, digits = digits, format = format)
}
}
\[B(\alpha,\beta)=\int_{0}^{1}t^{\alpha-1}(1-t)^{\beta-1}dt\]
rbeta(10e4, shape1 = 1, shape2 = 1) %>%
as_tibble() %>%
ggplot(aes(value)) +
geom_histogram(
colour = "black", fill = "grey50", bins = 50, boundary = 0
) +
labs(x = "p")
tibble(
`alpha = 0.5` = rbeta(10e4, shape1 = 0.5, shape2 = 1),
`alpha = 0.8` = rbeta(10e4, shape1 = 0.8, shape2 = 1),
`beta = 0.5` = rbeta(10e4, shape1 = 1, shape2 = 0.5),
`beta = 0.8` = rbeta(10e4, shape1 = 1, shape2 = 0.8)
) %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(value)) +
geom_histogram(
colour = "black", fill = "grey50", bins = 50, boundary = 0
) +
labs(x = "p") +
facet_wrap(~name, nrow = 2)
dar_A <- read_rds(here("data/dar/dar_fixed_A.Rds")) %>%
sortSeqlevels() %>%
endoapply(sort)
gene_dar_A <- read_rds("data/dar/gene_dar_fixed_A.Rds") %>%
sortSeqlevels() %>%
endoapply(sort)
dge_A <- read_rds("data/de/dgeList_psen1_naglu.Rds")
dge_A$samples <- dge_A$samples %>%
mutate(
group = case_when(
genotype == "EOfAD-like" ~ "eofad",
genotype == "MPS-IIIB" ~ "mps",
genotype == "WT" ~ "wt",
),
group = factor(group, levels = c("wt", "eofad", "mps"))
)
dge_A <- DGEList(
counts = dge_A$counts,
samples = dge_A$samples,
genes = dge_A$genes,
)
dge_A <- dge_A %>%
estimateDisp(design = model.matrix(~0 + group, .$samples))
colnames(dge_A$design) <- str_remove_all(colnames(dge_A$design), "^group")
contrasts_A <- makeContrasts(
eofad = eofad - wt,
mps = mps - wt,
levels = unique(dge_A$samples$group)
)
## Rerun the analysis using glmQL-Fits
res_A <- lapply(colnames(contrasts_A), function(x){
dge_A %>%
glmQLFit() %>%
glmQLFTest(contrast = contrasts_A[,x]) %>%
# glmQLFTest(coef = "groupmps") %>%
topTags(n = Inf) %>%
.[["table"]] %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
sortSeqlevels() %>%
sort()
}) %>%
set_names(colnames(contrasts_A))
lapply(names(res_A), function(x){
res_A[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
ggplot(aes(dar, -log10(PValue))) +
geom_point() +
labs(title = x)
})
lapply(names(res_A), function(x){
res_A[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar_q = cut(
dar,
breaks = quantile(dar, probs = seq(0, 1, length.out = 21), na.rm = TRUE),
# breaks = seq(0, 1, length.out = 7),
include.lowest = TRUE
) %>%
fct_na_value_to_level(levels(.)[[1]])
) %>%
ggplot(aes(PValue, after_stat(density))) +
geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
# geom_line(
# ## Add 1 - dar^2 in red
# aes(y = y, colour = f),
# data = . %>%
# pull(dar_q) %>%
# levels() %>%
# sapply(
# \(x) {
# min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
# tibble(
# PValue = seq(0.01, 1, by = 0.01),
# `1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
# `1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
# `sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
# )
# },
# simplify = FALSE
# ) %>%
# bind_rows(.id = "dar_q") %>%
# pivot_longer(
# cols = contains("- DAR"), names_to = "f", values_to = "y"
# ) %>%
# mutate(dar_q = fct_inorder(dar_q)),
# ) +
geom_label(
aes(x = 1, y = 0, label = label),
data = . %>%
summarise(n = dplyr::n(), .by = dar_q) %>%
mutate(label = paste("n =", comma(n))),
hjust = 1, vjust = 0, alpha = 0.7
) +
facet_wrap(~dar_q, scales = "free_y") +
labs(title = x)
})
beta_fits_A <- sapply(names(res_A), simplify = FALSE, function(x){
res_A[[x]] %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
dplyr::filter(!is.na(dar)) %>%
# mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
mutate(
dar_q = cut(
dar,
unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
include.lowest = TRUE
)
) %>%
split(.$dar_q) %>%
lapply(
\(x) {
tryCatch({ # !!
fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
tibble(
dar = median(x$dar),
shape1 = fit$estimate["shape1"],
se1 = fit$sd["shape1"],
shape2 = fit$estimate["shape2"],
se2 = fit$sd["shape2"]
)
}, error = \(e){
tibble(
dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
)
})
}
) %>%
bind_rows() %>%
mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_A, simplify = FALSE, function(x){
as.data.frame(x)
})
## $eofad
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.3642048 0.04688397 0.9997116 0.03245841 0.8854227
## 2 0.01047562 1.4269500 0.06963735 1.0240268 0.04709860 0.9261467
## 3 0.01785714 1.3744139 0.06697831 0.9872775 0.04528154 0.8920488
## 4 0.02698888 1.3864142 0.06459644 0.9988075 0.04384092 0.8998374
## 5 0.03569551 1.3813918 0.07025759 1.0669902 0.05183133 0.8965777
## 6 0.04300952 1.4297747 0.06944548 1.0746444 0.04964836 0.9279801
## 7 0.04990783 1.3573061 0.06584545 1.0210368 0.04703406 0.8809451
## 8 0.05582194 1.5407385 0.07571572 1.0321870 0.04731769 1.0000000
## 9 0.06250000 1.4773843 0.06494681 1.0856678 0.04525427 0.9588806
## 10 0.06500510 1.4080831 0.07833010 0.9796134 0.05100568 0.9139014
## 11 0.07029937 1.3803428 0.06667374 1.0966264 0.05086649 0.8958969
## 12 0.07638889 1.3718891 0.06624127 1.0842312 0.05021382 0.8904101
## 13 0.08333333 1.2587474 0.06069451 0.9926840 0.04577307 0.8169766
## 14 0.09036797 1.2739664 0.06123775 1.0143436 0.04673370 0.8268544
## 15 0.09777917 1.1628298 0.05594791 0.9436833 0.04357940 0.7547224
## 16 0.10680465 1.2222256 0.05849145 1.0250134 0.04745795 0.7932726
## 17 0.11562995 1.1718144 0.05603150 1.0071480 0.04678010 0.7605537
## 18 0.12500000 1.3053243 0.06300268 1.0287082 0.04752663 0.8472069
## 19 0.13448338 1.1103555 0.05274494 0.9940575 0.04620816 0.7206645
## 20 0.14929218 1.0873698 0.05160685 0.9753787 0.04530417 0.7057459
## 21 0.16928166 1.0017117 0.04699863 0.9928652 0.04650001 0.6501504
## 22 0.18760982 1.0236153 0.04800416 1.0322638 0.04849069 0.6643667
## 23 0.22321429 0.8427165 0.03752271 1.0526388 0.04910974 0.5469562
## 24 0.29940476 0.6808290 0.03141921 0.9393957 0.04668868 0.4418848
##
## $mps
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 0.9737913 0.03222091 0.9790729 0.03243162 1.0000000
## 2 0.01057899 0.9114516 0.04240630 0.9455825 0.04433756 0.9359825
## 3 0.01785714 0.9612079 0.04463266 1.0653909 0.05050219 0.9870779
## 4 0.02709042 0.9320072 0.04228929 0.9796237 0.04491253 0.9570913
## 5 0.03621032 0.8894145 0.04259590 0.8772056 0.04188501 0.9133523
## 6 0.04305923 0.9262845 0.04325805 0.9407186 0.04407552 0.9512146
## 7 0.04996135 0.9577556 0.04470216 0.9893267 0.04648444 0.9835327
## 8 0.05605103 0.9689856 0.04508065 1.0423201 0.04921040 0.9950650
## 9 0.06250000 0.9255260 0.04058723 1.0118108 0.04519436 0.9504357
## 10 0.06547619 0.9118877 0.04519804 0.9486099 0.04741189 0.9364303
## 11 0.07132959 0.9181072 0.04277084 0.9487388 0.04450463 0.9428172
## 12 0.07775160 0.8894110 0.04112852 0.9826578 0.04640470 0.9133487
## 13 0.08488414 0.9475253 0.04410841 1.0060724 0.04741332 0.9730271
## 14 0.09261998 0.9501541 0.04411442 1.0376180 0.04904419 0.9757266
## 15 0.10033682 0.9101634 0.04221283 0.9897001 0.04671128 0.9346596
## 16 0.10927871 0.9666169 0.04497515 1.0109044 0.04746661 0.9926325
## 17 0.11845956 0.8551450 0.03943612 0.9879780 0.04698346 0.8781605
## 18 0.12579597 0.8640545 0.03962850 1.0476678 0.05001984 0.8873098
## 19 0.13745518 0.8729122 0.04019928 1.0154643 0.04827190 0.8964059
## 20 0.15138535 0.8132749 0.03733955 0.9317239 0.04407398 0.8351634
## 21 0.17013889 0.7993177 0.03646375 0.9948921 0.04759025 0.8208306
## 22 0.18839826 0.7952783 0.03618391 1.0146055 0.04865511 0.8166825
## 23 0.22576681 0.7004583 0.03052830 0.9581323 0.04484193 0.7193105
## 24 0.31307169 0.4303412 0.01905719 0.7966763 0.04157581 0.4419234
plotly::ggplotly({
plotDarECDF(dar = dar_A$eofad, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_A$eofad) %>%
lm(shape1 ~ dar, data = beta_fits_A$eofad) %>%
step() %>%
summary()
## Start: AIC=-119.88
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.13759 -119.877
## - dar 1 0.86183 0.99942 -74.287
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_A$eofad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.14616 -0.05096 -0.01967 0.04179 0.18130
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.51037 0.02732 55.29 < 2e-16 ***
## dar -2.70382 0.23033 -11.74 6.06e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07908 on 22 degrees of freedom
## Multiple R-squared: 0.8623, Adjusted R-squared: 0.8561
## F-statistic: 137.8 on 1 and 22 DF, p-value: 6.057e-11
lm_A_eofad <- function(dar){
1.53110 - 2.82662 * dar
}
beta_fits_A$eofad %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_A_eofad(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_A_eofad <- res_A$eofad %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_A_eofad(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
res_A$eofad %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_A_eofad(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_A$eofad) %>%
lm(shape1_norm ~ dar, data = beta_fits_A$eofad) %>%
step() %>%
summary()
## Start: AIC=-140.63
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.05796 -140.626
## - dar 1 0.36305 0.42101 -95.036
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_A$eofad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.09486 -0.03307 -0.01277 0.02712 0.11767
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.98029 0.01773 55.29 < 2e-16 ***
## dar -1.75489 0.14949 -11.74 6.06e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05133 on 22 degrees of freedom
## Multiple R-squared: 0.8623, Adjusted R-squared: 0.8561
## F-statistic: 137.8 on 1 and 22 DF, p-value: 6.057e-11
lm_A_eofad_norm <- function(dar){
1.14213 - 2.10852 * dar
}
dar_tmp <- dar_A$eofad
lm_all <- tibble(
dataset = "A_eofad",
intercept = 1.14213,
slope = -2.10852,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
)
beta_fits_A$eofad %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_A_eofad_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_A_eofad_norm <- res_A$eofad %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_A_eofad_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
res_A$eofad %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_A_eofad_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_A$mps, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_A$mps) %>%
lm(shape1 ~ dar, data = beta_fits_A$mps) %>%
step() %>%
summary()
## Start: AIC=-137.25
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.066703 -137.25
## - dar 1 0.24449 0.311195 -102.29
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_A$mps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.145768 -0.027659 0.005285 0.034085 0.105399
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.01410 0.01887 53.73 < 2e-16 ***
## dar -1.39901 0.15579 -8.98 8.23e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05506 on 22 degrees of freedom
## Multiple R-squared: 0.7857, Adjusted R-squared: 0.7759
## F-statistic: 80.64 on 1 and 22 DF, p-value: 8.234e-09
lm_A_mps <- function(dar){
1.12769 - 1.80512 * dar
}
beta_fits_A$mps %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_A_mps(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_A_mps <- res_A$mps %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A$mps) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_A_mps(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_A_mps) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_A_mps),
background = ifelse(darP_A_mps$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5:38215924-38248462:- | ENSDARG00000060366 | slc12a9 | protein_coding | 0.4031449 | 4.8904326 | 2.00e-07 | 0.0004054 | 0.0638934 | 1.0000000 | 0.0000002 | 0.0015241 | TRUE | TRUE |
| 7:19909774-19923249:- | ENSDARG00000056233 | zgc:110591 | protein_coding | 0.4244109 | 4.3066556 | 2.00e-07 | 0.0004634 | 0.0608100 | 1.0000000 | 0.0000002 | 0.0015241 | TRUE | TRUE |
| 8:47633438-47665026:+ | ENSDARG00000007275 | si:ch211-251b21.1 | protein_coding | 0.6842232 | 9.2881803 | 0.00e+00 | 0.0000192 | 0.1875000 | 0.7892300 | 0.0000002 | 0.0015241 | TRUE | TRUE |
| 25:28714527-28768489:- | ENSDARG00000061719 | washc4 | protein_coding | 0.2736910 | 4.9829463 | 2.20e-06 | 0.0033698 | 0.0000000 | 1.0000000 | 0.0000022 | 0.0109519 | TRUE | TRUE |
| 24:40763020-40774028:- | ENSDARG00000103837 | CU633479.2 | protein_coding | 0.9207708 | 3.8575249 | 5.00e-07 | 0.0010961 | 0.1453873 | 0.8652485 | 0.0000038 | 0.0153026 | TRUE | TRUE |
| 8:29726620-29734951:+ | ENSDARG00000024540 | tspan36 | protein_coding | 0.5499149 | 5.7319949 | 5.90e-06 | 0.0065288 | 0.0008720 | 1.0000000 | 0.0000059 | 0.0195864 | TRUE | TRUE |
| 24:32650227-32665283:- | ENSDARG00000014488 | ca2 | protein_coding | 0.4202880 | 4.2110484 | 2.90e-06 | 0.0035998 | 0.1354167 | 0.8832467 | 0.0000127 | 0.0364171 | TRUE | TRUE |
| 5:37837245-37839310:+ | ENSDARG00000103498 | epd | protein_coding | 0.4902888 | 7.8277492 | 1.88e-05 | 0.0156429 | 0.0000000 | 1.0000000 | 0.0000188 | 0.0465635 | TRUE | TRUE |
| 25:17589906-17613025:+ | ENSDARG00000014303 | vac14 | protein_coding | 0.3800054 | 3.9872145 | 2.90e-06 | 0.0035998 | 0.1570791 | 0.8441434 | 0.0000212 | 0.0465635 | TRUE | TRUE |
| 11:16040517-16064291:+ | ENSDARG00000077082 | agtrap | protein_coding | 0.6484891 | 2.0941711 | 1.59e-05 | 0.0144971 | 0.0936756 | 0.9585943 | 0.0000251 | 0.0465635 | TRUE | TRUE |
| 8:37030974-37043900:- | ENSDARG00000008931 | renbp | protein_coding | 0.5951639 | 2.5800845 | 1.01e-05 | 0.0101522 | 0.1153063 | 0.9195483 | 0.0000255 | 0.0465635 | TRUE | TRUE |
| 22:22302614-22315323:+ | ENSDARG00000010531 | scamp4 | protein_coding | 0.2562861 | 4.7977629 | 3.90e-05 | 0.0260611 | 0.0326885 | 1.0000000 | 0.0000390 | 0.0651527 | TRUE | FALSE |
| 16:14216581-14231312:+ | ENSDARG00000076058 | gba | protein_coding | 0.2575265 | 4.6510506 | 4.23e-05 | 0.0263202 | 0.0189639 | 1.0000000 | 0.0000423 | 0.0653022 | TRUE | FALSE |
| 25:18889332-18906871:+ | ENSDARG00000015732 | cax2 | protein_coding | 0.5076029 | 3.9992018 | 1.00e-07 | 0.0001867 | 0.2924278 | 0.5998227 | 0.0000489 | 0.0701200 | TRUE | FALSE |
| 19:798589-808265:- | ENSDARG00000059354 | glmp | protein_coding | 0.2334236 | 4.9059589 | 5.50e-05 | 0.0315333 | 0.0045249 | 1.0000000 | 0.0000550 | 0.0735777 | TRUE | FALSE |
| 13:18073211-18122333:- | ENSDARG00000062532 | washc2c | protein_coding | 0.1868788 | 6.4142829 | 8.48e-05 | 0.0459874 | 0.0452385 | 1.0000000 | 0.0000848 | 0.1063458 | TRUE | FALSE |
| 24:41466841-41478917:- | ENSDARG00000114762 | CABZ01084131.1 | protein_coding | 0.5351594 | 3.5400987 | 4.11e-05 | 0.0263202 | 0.1373178 | 0.8798149 | 0.0001382 | 0.1260880 | TRUE | FALSE |
| 2:20555394-20599315:- | ENSDARG00000075487 | si:ch211-267e7.3 | protein_coding | -0.5567146 | 1.9374349 | 8.75e-05 | 0.0461998 | 0.1178659 | 0.9149278 | 0.0001937 | 0.1619729 | TRUE | FALSE |
| 4:5848229-5850633:+ | ENSDARG00000099560 | lyrm5a | protein_coding | 0.4614141 | 3.0222516 | 4.33e-05 | 0.0263202 | 0.1579423 | 0.8425852 | 0.0002105 | 0.1689560 | TRUE | FALSE |
| 24:40728978-40731305:- | ENSDARG00000114818 | CU633479.5 | protein_coding | 0.6986417 | 4.4674747 | 1.95e-05 | 0.0156429 | 0.2158306 | 0.7380898 | 0.0003337 | 0.2232535 | TRUE | FALSE |
| 15:26552948-26570948:- | ENSDARG00000079702 | wdr81 | protein_coding | 0.2943020 | 5.9871210 | 2.50e-06 | 0.0035832 | 0.2921738 | 0.6002812 | 0.0004337 | 0.2486555 | TRUE | FALSE |
| 25:18905472-18913336:- | ENSDARG00000101725 | parietopsin | protein_coding | 0.6208850 | 3.6948674 | 1.10e-06 | 0.0020332 | 0.3178795 | 0.5538794 | 0.0005044 | 0.2595579 | TRUE | FALSE |
| 21:13785969-13796940:+ | ENSDARG00000076092 | slc31a2 | protein_coding | 0.4162170 | 3.0021726 | 2.78e-05 | 0.0199521 | 0.2258850 | 0.7199404 | 0.0005253 | 0.2635472 | TRUE | FALSE |
| 24:33552198-33578392:- | ENSDARG00000056909 | LO018309.1 | protein_coding | -1.2242952 | 1.5011888 | 1.68e-05 | 0.0146180 | 0.2916667 | 0.6011967 | 0.0013451 | 0.4498877 | TRUE | FALSE |
| 17:32622931-32631514:+ | ENSDARG00000055120 | ctsba | protein_coding | 0.7909985 | 7.2871241 | 6.26e-05 | 0.0348852 | 0.2630109 | 0.6529237 | 0.0018005 | 0.5161809 | TRUE | FALSE |
| 24:39443874-39511864:+ | ENSDARG00000068468 | dnah3 | protein_coding | 0.8253023 | 1.2324678 | 2.25e-05 | 0.0170458 | 0.4293025 | 0.3527475 | 0.0229206 | 0.8813342 | TRUE | FALSE |
| 24:37631632-37640705:- | ENSDARG00000004274 | zgc:112496 | protein_coding | 0.7992052 | 0.4481920 | 3.03e-05 | 0.0209476 | 0.4699074 | 0.2794507 | 0.0545972 | 0.9459566 | TRUE | FALSE |
| 24:37406535-37429066:+ | ENSDARG00000092499 | si:ch211-183d21.1 | protein_coding | -0.8246218 | 4.9044281 | 0.00e+00 | 0.0000001 | 0.9370246 | 0.1000000 | 0.0717002 | 0.9459566 | TRUE | FALSE |
| 24:36903172-36910224:- | ENSDARG00000036304 | dnaaf3l | protein_coding | 2.4162852 | -0.3139137 | 0.00e+00 | 0.0000002 | 0.6285714 | 0.1000000 | 0.0835136 | 0.9459566 | TRUE | FALSE |
| 24:36798558-36802891:- | ENSDARG00000061638 | dcakd | protein_coding | -0.8559429 | 2.6731254 | 0.00e+00 | 0.0000007 | 0.9479167 | 0.1000000 | 0.0999672 | 0.9459566 | TRUE | FALSE |
| 24:36452924-36524504:+ | ENSDARG00000101812 | zgc:114120 | protein_coding | 0.7117346 | 1.7438925 | 6.90e-06 | 0.0073400 | 0.5208333 | 0.1875233 | 0.1078311 | 0.9459566 | TRUE | FALSE |
| 24:37709191-37721739:+ | ENSDARG00000045257 | decr2 | protein_coding | -0.7645817 | 2.7648802 | 2.29e-05 | 0.0170458 | 0.5179436 | 0.1927396 | 0.1275792 | 0.9459566 | TRUE | FALSE |
| 24:36129216-36136401:- | ENSDARG00000114991 | tmem14cb | protein_coding | -1.5800575 | -0.1447143 | 0.00e+00 | 0.0000819 | 0.6805556 | 0.1000000 | 0.1702015 | 0.9459566 | TRUE | FALSE |
| 24:37484661-37531540:+ | ENSDARG00000079241 | wdr90 | protein_coding | -1.2100636 | 2.2651867 | 0.00e+00 | 0.0000873 | 0.7692294 | 0.1000000 | 0.1744485 | 0.9459566 | TRUE | FALSE |
| 24:38155830-38191962:+ | ENSDARG00000071460 | si:ch211-234p6.5 | protein_coding | -0.5346074 | 3.2638178 | 1.40e-06 | 0.0023490 | 0.5695445 | 0.1000000 | 0.2598704 | 0.9521641 | TRUE | FALSE |
| 24:39105051-39114123:+ | ENSDARG00000075463 | mss51 | protein_coding | -0.7148683 | 5.0518228 | 3.00e-06 | 0.0035998 | 0.8121222 | 0.1000000 | 0.2808165 | 0.9521641 | TRUE | FALSE |
| 24:39638555-39654132:+ | ENSDARG00000055903 | luc7l | protein_coding | 0.2276765 | 6.3022584 | 1.41e-05 | 0.0134942 | 0.8058615 | 0.1000000 | 0.3273303 | 0.9595125 | TRUE | FALSE |
| 24:38098896-38099675:- | ENSDARG00000093671 | BX001030.1 | unprocessed_pseudogene | 0.9300512 | 0.7739344 | 5.47e-05 | 0.0315333 | 0.6125000 | 0.1000000 | 0.3747940 | 0.9595125 | TRUE | FALSE |
res_A$mps %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A$mps) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_A_mps(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_A$mps) %>%
lm(shape1_norm ~ dar, data = beta_fits_A$mps) %>%
step() %>%
summary()
## Start: AIC=-135.98
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.07034 -135.98
## - dar 1 0.25783 0.32817 -101.02
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_A$mps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.149691 -0.028403 0.005427 0.035002 0.108236
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.04139 0.01938 53.73 < 2e-16 ***
## dar -1.43667 0.15999 -8.98 8.23e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05655 on 22 degrees of freedom
## Multiple R-squared: 0.7857, Adjusted R-squared: 0.7759
## F-statistic: 80.64 on 1 and 22 DF, p-value: 8.234e-09
lm_A_mps_norm <- function(dar){
1.13861 - 1.82260 * dar
}
dar_tmp <- dar_A$mps
lm_all <- lm_all %>%
rbind(tibble(
dataset = "A_mps",
intercept = 1.13861,
slope = -1.82260,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_A$mps %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_A_mps_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_A_mps_norm <- res_A$mps %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A$mps) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_A_mps_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_A_mps_norm) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_A_mps_norm),
background = ifelse(darP_A_mps_norm$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5:38215924-38248462:- | ENSDARG00000060366 | slc12a9 | protein_coding | 0.4031449 | 4.8904326 | 2.00e-07 | 0.0004054 | 0.0638934 | 1.0000000 | 0.0000002 | 0.0013902 | TRUE | TRUE |
| 8:47633438-47665026:+ | ENSDARG00000007275 | si:ch211-251b21.1 | protein_coding | 0.6842232 | 9.2881803 | 0.00e+00 | 0.0000192 | 0.1875000 | 0.7968725 | 0.0000002 | 0.0013902 | TRUE | TRUE |
| 7:19909774-19923249:- | ENSDARG00000056233 | zgc:110591 | protein_coding | 0.4244109 | 4.3066556 | 2.00e-07 | 0.0004634 | 0.0608100 | 1.0000000 | 0.0000002 | 0.0013902 | TRUE | TRUE |
| 25:28714527-28768489:- | ENSDARG00000061719 | washc4 | protein_coding | 0.2736910 | 4.9829463 | 2.20e-06 | 0.0033698 | 0.0000000 | 1.0000000 | 0.0000022 | 0.0109519 | TRUE | TRUE |
| 24:40763020-40774028:- | ENSDARG00000103837 | CU633479.2 | protein_coding | 0.9207708 | 3.8575249 | 5.00e-07 | 0.0010961 | 0.1453873 | 0.8736271 | 0.0000034 | 0.0135611 | TRUE | TRUE |
| 8:29726620-29734951:+ | ENSDARG00000024540 | tspan36 | protein_coding | 0.5499149 | 5.7319949 | 5.90e-06 | 0.0065288 | 0.0008720 | 1.0000000 | 0.0000059 | 0.0195864 | TRUE | TRUE |
| 24:32650227-32665283:- | ENSDARG00000014488 | ca2 | protein_coding | 0.4202880 | 4.2110484 | 2.90e-06 | 0.0035998 | 0.1354167 | 0.8917996 | 0.0000114 | 0.0326508 | TRUE | TRUE |
| 5:37837245-37839310:+ | ENSDARG00000103498 | epd | protein_coding | 0.4902888 | 7.8277492 | 1.88e-05 | 0.0156429 | 0.0000000 | 1.0000000 | 0.0000188 | 0.0420309 | TRUE | TRUE |
| 25:17589906-17613025:+ | ENSDARG00000014303 | vac14 | protein_coding | 0.3800054 | 3.9872145 | 2.90e-06 | 0.0035998 | 0.1570791 | 0.8523177 | 0.0000191 | 0.0420309 | TRUE | TRUE |
| 11:16040517-16064291:+ | ENSDARG00000077082 | agtrap | protein_coding | 0.6484891 | 2.0941711 | 1.59e-05 | 0.0144971 | 0.0936756 | 0.9678769 | 0.0000227 | 0.0420309 | TRUE | TRUE |
| 8:37030974-37043900:- | ENSDARG00000008931 | renbp | protein_coding | 0.5951639 | 2.5800845 | 1.01e-05 | 0.0101522 | 0.1153063 | 0.9284528 | 0.0000230 | 0.0420309 | TRUE | TRUE |
| 22:22302614-22315323:+ | ENSDARG00000010531 | scamp4 | protein_coding | 0.2562861 | 4.7977629 | 3.90e-05 | 0.0260611 | 0.0326885 | 1.0000000 | 0.0000390 | 0.0636944 | TRUE | FALSE |
| 16:14216581-14231312:+ | ENSDARG00000076058 | gba | protein_coding | 0.2575265 | 4.6510506 | 4.23e-05 | 0.0263202 | 0.0189639 | 1.0000000 | 0.0000423 | 0.0636944 | TRUE | FALSE |
| 25:18889332-18906871:+ | ENSDARG00000015732 | cax2 | protein_coding | 0.5076029 | 3.9992018 | 1.00e-07 | 0.0001867 | 0.2924278 | 0.6056311 | 0.0000444 | 0.0636944 | TRUE | FALSE |
| 19:798589-808265:- | ENSDARG00000059354 | glmp | protein_coding | 0.2334236 | 4.9059589 | 5.50e-05 | 0.0315333 | 0.0045249 | 1.0000000 | 0.0000550 | 0.0735777 | TRUE | FALSE |
| 13:18073211-18122333:- | ENSDARG00000062532 | washc2c | protein_coding | 0.1868788 | 6.4142829 | 8.48e-05 | 0.0459874 | 0.0452385 | 1.0000000 | 0.0000848 | 0.1063458 | TRUE | FALSE |
| 24:41466841-41478917:- | ENSDARG00000114762 | CABZ01084131.1 | protein_coding | 0.5351594 | 3.5400987 | 4.11e-05 | 0.0263202 | 0.1373178 | 0.8883346 | 0.0001268 | 0.1167662 | TRUE | FALSE |
| 2:20555394-20599315:- | ENSDARG00000075487 | si:ch211-267e7.3 | protein_coding | -0.5567146 | 1.9374349 | 8.75e-05 | 0.0461998 | 0.1178659 | 0.9237875 | 0.0001783 | 0.1491039 | TRUE | FALSE |
| 4:5848229-5850633:+ | ENSDARG00000099560 | lyrm5a | protein_coding | 0.4614141 | 3.0222516 | 4.33e-05 | 0.0263202 | 0.1579423 | 0.8507444 | 0.0001939 | 0.1556573 | TRUE | FALSE |
| 24:40728978-40731305:- | ENSDARG00000114818 | CU633479.5 | protein_coding | 0.6986417 | 4.4674747 | 1.95e-05 | 0.0156429 | 0.2158306 | 0.7452371 | 0.0003089 | 0.2137254 | TRUE | FALSE |
| 15:26552948-26570948:- | ENSDARG00000079702 | wdr81 | protein_coding | 0.2943020 | 5.9871210 | 2.50e-06 | 0.0035832 | 0.2921738 | 0.6060940 | 0.0004023 | 0.2380284 | TRUE | FALSE |
| 25:18905472-18913336:- | ENSDARG00000101725 | parietopsin | protein_coding | 0.6208850 | 3.6948674 | 1.10e-06 | 0.0020332 | 0.3178795 | 0.5592429 | 0.0004687 | 0.2525877 | TRUE | FALSE |
| 21:13785969-13796940:+ | ENSDARG00000076092 | slc31a2 | protein_coding | 0.4162170 | 3.0021726 | 2.78e-05 | 0.0199521 | 0.2258850 | 0.7269119 | 0.0004883 | 0.2525877 | TRUE | FALSE |
| 24:33552198-33578392:- | ENSDARG00000056909 | LO018309.1 | protein_coding | -1.2242952 | 1.5011888 | 1.68e-05 | 0.0146180 | 0.2916667 | 0.6070183 | 0.0012617 | 0.4360714 | TRUE | FALSE |
| 17:32622931-32631514:+ | ENSDARG00000055120 | ctsba | protein_coding | 0.7909985 | 7.2871241 | 6.26e-05 | 0.0348852 | 0.2630109 | 0.6592463 | 0.0016936 | 0.4998201 | TRUE | FALSE |
| 24:39443874-39511864:+ | ENSDARG00000068468 | dnah3 | protein_coding | 0.8253023 | 1.2324678 | 2.25e-05 | 0.0170458 | 0.4293025 | 0.3561633 | 0.0220977 | 0.8629069 | TRUE | FALSE |
| 24:37631632-37640705:- | ENSDARG00000004274 | zgc:112496 | protein_coding | 0.7992052 | 0.4481920 | 3.03e-05 | 0.0209476 | 0.4699074 | 0.2821568 | 0.0530813 | 0.9391996 | TRUE | FALSE |
| 24:37406535-37429066:+ | ENSDARG00000092499 | si:ch211-183d21.1 | protein_coding | -0.8246218 | 4.9044281 | 0.00e+00 | 0.0000001 | 0.9370246 | 0.1000000 | 0.0717002 | 0.9391996 | TRUE | FALSE |
| 24:36903172-36910224:- | ENSDARG00000036304 | dnaaf3l | protein_coding | 2.4162852 | -0.3139137 | 0.00e+00 | 0.0000002 | 0.6285714 | 0.1000000 | 0.0835136 | 0.9391996 | TRUE | FALSE |
| 24:36798558-36802891:- | ENSDARG00000061638 | dcakd | protein_coding | -0.8559429 | 2.6731254 | 0.00e+00 | 0.0000007 | 0.9479167 | 0.1000000 | 0.0999672 | 0.9391996 | TRUE | FALSE |
| 24:36452924-36524504:+ | ENSDARG00000101812 | zgc:114120 | protein_coding | 0.7117346 | 1.7438925 | 6.90e-06 | 0.0073400 | 0.5208333 | 0.1893392 | 0.1055305 | 0.9391996 | TRUE | FALSE |
| 24:37709191-37721739:+ | ENSDARG00000045257 | decr2 | protein_coding | -0.7645817 | 2.7648802 | 2.29e-05 | 0.0170458 | 0.5179436 | 0.1946060 | 0.1250607 | 0.9391996 | TRUE | FALSE |
| 24:36129216-36136401:- | ENSDARG00000114991 | tmem14cb | protein_coding | -1.5800575 | -0.1447143 | 0.00e+00 | 0.0000819 | 0.6805556 | 0.1000000 | 0.1702015 | 0.9391996 | TRUE | FALSE |
| 24:37484661-37531540:+ | ENSDARG00000079241 | wdr90 | protein_coding | -1.2100636 | 2.2651867 | 0.00e+00 | 0.0000873 | 0.7692294 | 0.1000000 | 0.1744485 | 0.9391996 | TRUE | FALSE |
| 24:38155830-38191962:+ | ENSDARG00000071460 | si:ch211-234p6.5 | protein_coding | -0.5346074 | 3.2638178 | 1.40e-06 | 0.0023490 | 0.5695445 | 0.1005581 | 0.2579232 | 0.9462021 | TRUE | FALSE |
| 24:39105051-39114123:+ | ENSDARG00000075463 | mss51 | protein_coding | -0.7148683 | 5.0518228 | 3.00e-06 | 0.0035998 | 0.8121222 | 0.1000000 | 0.2808165 | 0.9462021 | TRUE | FALSE |
| 24:39638555-39654132:+ | ENSDARG00000055903 | luc7l | protein_coding | 0.2276765 | 6.3022584 | 1.41e-05 | 0.0134942 | 0.8058615 | 0.1000000 | 0.3273303 | 0.9545611 | TRUE | FALSE |
| 24:38098896-38099675:- | ENSDARG00000093671 | BX001030.1 | unprocessed_pseudogene | 0.9300512 | 0.7739344 | 5.47e-05 | 0.0315333 | 0.6125000 | 0.1000000 | 0.3747940 | 0.9558181 | TRUE | FALSE |
res_A$mps %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_A$mps) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_A_mps_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
meta_B <- read_csv(here("data/metadata/sorl1.csv")) %>%
as.data.frame() %>%
dplyr::arrange(Run) %>%
mutate(
genotype = case_when(
Genotype == "wild-type" ~ "WT",
Genotype == "sorl1V1482Afs/+" ~ "V1482Afs_het",
Genotype == "sorl1R122Pfs/+" ~ "R122Pfs_het",
Genotype == "sorl1V1482Afs/R122Pfs" ~ "Trans",
),
genotype = factor(
genotype,
levels = c("WT", "V1482Afs_het", "R122Pfs_het", "Trans")
),
alias = c(
paste0(rep("WT", 8), seq(1, 8)),
paste0(rep("V1482Afs", 6), seq(1, 6)),
paste0(rep("R122Pfs", 4), seq(1, 4)),
paste0(rep("Trans", 6), seq(1, 6))
)
) %>%
dplyr::select(
sample = Run, genotype, genotype2 = Genotype, alias, gender, tank = Tank
)
dar_B <- read_rds(here("data/dar/dar_fixed_B.Rds")) %>%
sortSeqlevels() %>%
endoapply(sort)
gene_dar_B <- read_rds("data/dar/gene_dar_fixed_B.Rds") %>%
sortSeqlevels() %>%
endoapply(sort)
dge_B <- read_rds("data/de/dgeList_sorl1_cqn.Rds")
dge_B <- DGEList(
counts = dge_B$counts,
samples = dge_B$samples,
genes = dge_B$genes,
offset = dge_B$offset
)
dge_B <- dge_B %>%
estimateDisp(design = model.matrix(~0 + genotype, .$sample))
colnames(dge_B$design) <- str_remove_all(colnames(dge_B$design), "^genotype")
contrasts_B <- makeContrasts(
V1482Afs = V1482Afs_het - WT,
R122Pfs = R122Pfs_het - WT,
trans = Trans - WT,
levels = unique(dge_B$samples$genotype)
)
## Rerun the analysis using glmQL-Fits
res_B <- lapply(colnames(contrasts_B), function(x){
dge_B %>%
glmQLFit() %>%
glmQLFTest(contrast = contrasts_B[,x]) %>%
# glmFit() %>%
# glmLRT(contrast = contrasts_B[,x]) %>%
topTags(n = Inf) %>%
.[["table"]] %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
sortSeqlevels() %>%
sort()
}) %>%
set_names(colnames(contrasts_B))
lapply(names(res_B), function(x){
res_B[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
ggplot(aes(dar, -log10(PValue))) +
geom_point() +
labs(title = x)
})
lapply(names(res_B), function(x){
res_B[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar_q = cut(
dar,
breaks = quantile(dar, probs = seq(0, 1, length.out = 21), na.rm = TRUE),
# breaks = seq(0, 1, length.out = 7),
include.lowest = TRUE
) %>%
fct_na_value_to_level(levels(.)[[1]])
) %>%
ggplot(aes(PValue, after_stat(density))) +
geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
geom_line(
## Add 1 - dar^2 in red
aes(y = y, colour = f),
data = . %>%
pull(dar_q) %>%
levels() %>%
sapply(
\(x) {
min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
tibble(
PValue = seq(0.01, 1, by = 0.01),
`1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
`1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
`sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
)
},
simplify = FALSE
) %>%
bind_rows(.id = "dar_q") %>%
pivot_longer(
cols = contains("- DAR"), names_to = "f", values_to = "y"
) %>%
mutate(dar_q = fct_inorder(dar_q)),
) +
geom_label(
aes(x = 1, y = 0, label = label),
data = . %>%
summarise(n = dplyr::n(), .by = dar_q) %>%
mutate(label = paste("n =", comma(n))),
hjust = 1, vjust = 0, alpha = 0.7
) +
facet_wrap(~dar_q, scales = "free_y") +
labs(title = x)
})
beta_fits_B <- sapply(names(res_B), simplify = FALSE, function(x){
res_B[[x]] %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
dplyr::filter(!is.na(dar)) %>%
# mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
mutate(
dar_q = cut(
dar,
unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
include.lowest = TRUE
)
) %>%
split(.$dar_q) %>%
lapply(
\(x) {
tryCatch({ # !!
fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
tibble(
dar = median(x$dar),
shape1 = fit$estimate["shape1"],
se1 = fit$sd["shape1"],
shape2 = fit$estimate["shape2"],
se2 = fit$sd["shape2"]
)
}, error = \(e){
tibble(
dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
)
})
}
) %>%
bind_rows() %>%
mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_B, simplify = FALSE, function(x){
as.data.frame(x)
})
## $V1482Afs
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.2751730 0.07099322 1.0429773 0.05595571 0.8461148
## 2 0.02500000 1.3936937 0.07785129 1.1403642 0.06152825 0.9247568
## 3 0.03971648 1.3672888 0.07680465 1.0506778 0.05629477 0.9072364
## 4 0.05000000 1.3130354 0.07361347 1.0104407 0.05397625 0.8712377
## 5 0.05984432 1.3880126 0.07492623 1.0911469 0.05645766 0.9209872
## 6 0.06619048 1.3937882 0.08155919 1.0600039 0.05908188 0.9248195
## 7 0.07287461 1.3799631 0.07714644 1.1140163 0.05998785 0.9156462
## 8 0.07924107 1.5070921 0.08583149 1.0067926 0.05339759 1.0000000
## 9 0.08427940 1.2575298 0.06953154 1.1128890 0.06018940 0.8344081
## 10 0.09067460 1.4203479 0.07951842 1.1496767 0.06207586 0.9424427
## 11 0.09736395 1.3068775 0.06997319 1.1120042 0.05784762 0.8671517
## 12 0.10359808 1.2424118 0.07147307 1.1165337 0.06299918 0.8243768
## 13 0.10937500 1.3259063 0.07354046 1.1662442 0.06325244 0.8797779
## 14 0.11607143 1.4227808 0.07953142 1.1624677 0.06278027 0.9440570
## 15 0.12404018 1.2761821 0.07092913 1.0764197 0.05800066 0.8467844
## 16 0.13133252 1.3256420 0.07405507 1.0656200 0.05723004 0.8796025
## 17 0.14045905 1.3477115 0.07516093 1.1119188 0.05993992 0.8942462
## 18 0.14898941 1.3390293 0.07454245 1.1272703 0.06087993 0.8884854
## 19 0.15969259 1.1520174 0.06318908 1.0976725 0.05966611 0.7643975
## 20 0.17085777 1.1207193 0.06142753 1.0631675 0.05768938 0.7436303
## 21 0.18569303 1.1108075 0.06087304 1.0516427 0.05702763 0.7370535
## 22 0.20121094 1.2078274 0.06624716 1.1816014 0.06455384 0.8014290
## 23 0.22330659 1.1947044 0.06567932 1.1253326 0.06119095 0.7927216
## 24 0.26420878 1.2355606 0.06792342 1.1438837 0.06201628 0.8198308
## 25 0.35729167 0.8403017 0.04487922 0.9769802 0.05389404 0.5575649
##
## $R122Pfs
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.6787145 0.09746029 0.9905426 0.05253660 0.8204822
## 2 0.02500000 1.8614460 0.10899304 0.9400388 0.04903484 0.9097934
## 3 0.04687500 1.8396059 0.10803467 1.0486369 0.05621260 0.8991189
## 4 0.06187144 1.8234119 0.09742558 1.0052581 0.04866473 0.8912040
## 5 0.06770833 1.8194470 0.11522313 1.0163685 0.05846631 0.8892661
## 6 0.07502631 1.9315702 0.11645390 0.9962538 0.05391494 0.9440670
## 7 0.08280804 2.0460096 0.11966835 1.1084793 0.05903674 1.0000000
## 8 0.08928571 1.7448896 0.10236812 0.9403212 0.04957832 0.8528257
## 9 0.09679719 1.8904863 0.11064519 1.0385342 0.05517830 0.9239870
## 10 0.10437853 1.9057402 0.11159624 1.0322578 0.05474986 0.9314425
## 11 0.11215274 1.8691354 0.10897015 1.0587812 0.05633489 0.9135516
## 12 0.11954365 1.8411469 0.10800103 0.9825424 0.05190627 0.8998721
## 13 0.12500000 1.7973960 0.10426315 1.0718229 0.05717370 0.8784885
## 14 0.13299150 1.5969010 0.09260670 0.9435020 0.04983428 0.7804953
## 15 0.14259547 1.6762335 0.09777584 0.9535161 0.05041723 0.8192696
## 16 0.15291005 1.5496412 0.08936982 0.9723092 0.05162797 0.7573969
## 17 0.16445410 1.6950964 0.09788561 1.0663901 0.05702428 0.8284890
## 18 0.17678571 1.6190225 0.09323974 1.0341895 0.05517637 0.7913074
## 19 0.18992419 1.4388561 0.08313069 0.8768443 0.04614133 0.7032499
## 20 0.20838392 1.5563249 0.08967183 0.9945509 0.05297371 0.7606635
## 21 0.23405704 1.4337323 0.08160129 1.0200680 0.05463318 0.7007456
## 22 0.26078869 1.5051914 0.08621486 1.0090487 0.05385909 0.7356717
## 23 0.30000000 1.2954290 0.07324948 0.9674776 0.05178135 0.6331490
## 24 0.35375616 1.0526885 0.05848550 0.8932853 0.04797795 0.5145081
## 25 0.48214286 0.8205036 0.04448293 0.8205841 0.04448828 0.4010263
##
## $trans
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.3040392 0.07306468 1.0488687 0.05647862 0.8450706
## 2 0.02678571 1.2761967 0.07205179 0.9834770 0.05287096 0.8270275
## 3 0.04166667 1.4429798 0.08179030 1.0656314 0.05727570 0.9351098
## 4 0.05305848 1.3328594 0.07505164 1.0500585 0.05662896 0.8637472
## 5 0.06250000 1.3280911 0.07472444 1.0237714 0.05492394 0.8606572
## 6 0.06944444 1.5002304 0.08551505 1.0799902 0.05817538 0.9722105
## 7 0.07619048 1.4726310 0.08371394 1.0676040 0.05738710 0.9543249
## 8 0.08333333 1.4704375 0.08342671 1.0896881 0.05870633 0.9529034
## 9 0.08918100 1.4143899 0.07988385 1.0830002 0.05836860 0.9165824
## 10 0.09577922 1.5431127 0.08775136 1.1259488 0.06073248 1.0000000
## 11 0.10182073 1.4216419 0.08033943 1.0718892 0.05764058 0.9212820
## 12 0.10819293 1.3487550 0.07615618 1.0434054 0.05624395 0.8740483
## 13 0.11479167 1.5076057 0.08625771 1.0227050 0.05466285 0.9769900
## 14 0.12267019 1.4733556 0.08315171 1.1521079 0.06237518 0.9547945
## 15 0.12951128 1.1702214 0.06585581 0.8965300 0.04784291 0.7583512
## 16 0.13824405 1.4339779 0.08129812 1.0652680 0.05732156 0.9292762
## 17 0.14757456 1.2709339 0.07137870 0.9973728 0.05352830 0.8236170
## 18 0.15749150 1.2612978 0.07056222 1.0643976 0.05772051 0.8173724
## 19 0.16828901 1.2920545 0.07283397 0.9967640 0.05353569 0.8373040
## 20 0.18092991 1.1840186 0.06615148 0.9637927 0.05174080 0.7672923
## 21 0.19488989 1.2603250 0.07083817 0.9877302 0.05302405 0.8167420
## 22 0.21232796 1.1021508 0.06129986 0.9570211 0.05175474 0.7142387
## 23 0.23822763 1.1372496 0.06272236 1.0725716 0.05850255 0.7369841
## 24 0.28149004 0.9665891 0.05280483 0.9570122 0.05217400 0.6263892
## 25 0.40000000 0.6670882 0.03525290 0.7918470 0.04361225 0.4323003
plotly::ggplotly({
plotDarECDF(dar = dar_B$V1482Afs, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_B$V1482Afs) %>%
lm(shape1 ~ dar, data = beta_fits_B$V1482Afs) %>%
step() %>%
summary()
## Start: AIC=-118.73
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.18447 -118.729
## - dar 1 0.25571 0.44017 -98.987
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_B$V1482Afs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17147 -0.06989 0.01897 0.04931 0.16320
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.44665 0.03365 42.987 < 2e-16 ***
## dar -1.29680 0.22966 -5.646 9.53e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08956 on 23 degrees of freedom
## Multiple R-squared: 0.5809, Adjusted R-squared: 0.5627
## F-statistic: 31.88 on 1 and 23 DF, p-value: 9.525e-06
lm_B_V1482Afs <- function(dar){
# 1.46331 - 1.41616 * dar
1.52192 - 1.64891 * dar
}
beta_fits_B$V1482Afs %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_B_V1482Afs(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_B_V1482Afs <- res_B$V1482Afs %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$V1482Afs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_V1482Afs(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_B_V1482Afs) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_B_V1482Afs),
background = ifelse(darP_B_V1482Afs$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15:20403084-20405796:+ | ENSDARG00000069464 | cox7a1 | protein_coding | -2.241106 | 1.013476 | 2e-07 | 0.003347 | 0.4938229 | 0.7076505 | 1.78e-05 | 0.306842 | TRUE | FALSE |
res_B$V1482Afs %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$V1482Afs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_V1482Afs(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_B$V1482Afs) %>%
lm(shape1_norm ~ dar, data = beta_fits_B$V1482Afs) %>%
step() %>%
summary()
## Start: AIC=-139.24
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.081215 -139.24
## - dar 1 0.11258 0.193795 -119.50
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_B$V1482Afs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11378 -0.04637 0.01259 0.03272 0.10829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.95989 0.02233 42.987 < 2e-16 ***
## dar -0.86046 0.15239 -5.646 9.53e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05942 on 23 degrees of freedom
## Multiple R-squared: 0.5809, Adjusted R-squared: 0.5627
## F-statistic: 31.88 on 1 and 23 DF, p-value: 9.525e-06
lm_B_V1482Afs_norm <- function(dar){
1.12122 - 1.08509 * dar
}
dar_tmp <- dar_B$V1482Afs
lm_all <- lm_all %>%
rbind(tibble(
dataset = "B_V1482Afs",
intercept = 1.12122,
slope = -1.08509,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_B$V1482Afs %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_B_V1482Afs_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_B_V1482Afs_norm <- res_B$V1482Afs %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$V1482Afs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_V1482Afs_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_B_V1482Afs_norm) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_B_V1482Afs_norm),
background = ifelse(darP_B_V1482Afs_norm$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15:20403084-20405796:+ | ENSDARG00000069464 | cox7a1 | protein_coding | -2.241106 | 1.013476 | 2e-07 | 0.003347 | 0.4938229 | 0.5853777 | 0.0001177 | 0.5997684 | TRUE | FALSE |
res_B$V1482Afs %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$V1482Afs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_V1482Afs_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_B$R122Pfs, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_B$R122Pfs) %>%
lm(shape1 ~ dar, data = beta_fits_B$R122Pfs) %>%
step() %>%
summary()
## Start: AIC=-104.18
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.33005 -104.185
## - dar 1 1.593 1.92307 -62.124
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_B$R122Pfs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33278 -0.06104 -0.00661 0.09750 0.23034
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.01150 0.04173 48.21 < 2e-16 ***
## dar -2.36484 0.22445 -10.54 2.83e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1198 on 23 degrees of freedom
## Multiple R-squared: 0.8284, Adjusted R-squared: 0.8209
## F-statistic: 111 on 1 and 23 DF, p-value: 2.827e-10
lm_B_R122Pfs <- function(dar){
2.00307 - 2.41516 * dar
}
beta_fits_B$R122Pfs %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_B_R122Pfs(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_B_R122Pfs <- res_B$R122Pfs %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$R122Pfs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_R122Pfs(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_B_R122Pfs) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_B_R122Pfs),
background = ifelse(darP_B_R122Pfs$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15:14109652-14120431:+ | ENSDARG00000016290 | clca1 | protein_coding | -8.395536 | 4.148533 | 0.0e+00 | 0.0005694 | 0.000000 | 1.0000000 | 0.00e+00 | 0.0005694 | TRUE | TRUE |
| 15:17030473-17048822:+ | ENSDARG00000042332 | plin2 | protein_coding | -2.610720 | 2.614403 | 3.2e-06 | 0.0278195 | 0.000000 | 1.0000000 | 3.20e-06 | 0.0278195 | TRUE | TRUE |
| 15:21521803-21540276:- | ENSDARG00000097580 | BX663519.1 | processed_transcript | -3.524666 | 1.196269 | 7.9e-06 | 0.0455044 | 0.468254 | 0.8721617 | 3.55e-05 | 0.2042783 | TRUE | FALSE |
res_B$R122Pfs %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$R122Pfs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_R122Pfs(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_B$R122Pfs) %>%
lm(shape1_norm ~ dar, data = beta_fits_B$R122Pfs) %>%
step() %>%
summary()
## Start: AIC=-139.98
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.07884 -139.980
## - dar 1 0.38055 0.45939 -97.918
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_B$R122Pfs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.162649 -0.029833 -0.003233 0.047653 0.112581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.98313 0.02039 48.21 < 2e-16 ***
## dar -1.15583 0.10970 -10.54 2.83e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05855 on 23 degrees of freedom
## Multiple R-squared: 0.8284, Adjusted R-squared: 0.8209
## F-statistic: 111 on 1 and 23 DF, p-value: 2.827e-10
lm_B_R122Pfs_norm <- function(dar){
1.12165 - 1.35240 * dar
}
dar_tmp <- dar_B$R122Pfs
lm_all <- lm_all %>%
rbind(tibble(
dataset = "B_R122Pfs",
intercept = 1.12165,
slope = -1.35240,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_B$R122Pfs %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_B_R122Pfs_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_B_R122Pfs_norm <- res_B$R122Pfs %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$R122Pfs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_R122Pfs_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_B_R122Pfs_norm) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_B_R122Pfs_norm),
background = ifelse(darP_B_R122Pfs_norm$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15:14109652-14120431:+ | ENSDARG00000016290 | clca1 | protein_coding | -8.395536 | 4.148533 | 0.0e+00 | 0.0005694 | 0.000000 | 1.0000000 | 0.0000000 | 0.0005694 | TRUE | TRUE |
| 15:17030473-17048822:+ | ENSDARG00000042332 | plin2 | protein_coding | -2.610720 | 2.614403 | 3.2e-06 | 0.0278195 | 0.000000 | 1.0000000 | 0.0000032 | 0.0278195 | TRUE | TRUE |
| 15:21521803-21540276:- | ENSDARG00000097580 | BX663519.1 | processed_transcript | -3.524666 | 1.196269 | 7.9e-06 | 0.0455044 | 0.468254 | 0.4883833 | 0.0032248 | 0.9999337 | TRUE | FALSE |
res_B$R122Pfs %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$R122Pfs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_R122Pfs_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_B$trans, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_B$trans) %>%
lm(shape1 ~ dar, data = beta_fits_B$trans) %>%
step() %>%
summary()
## Start: AIC=-105.46
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.31365 -105.459
## - dar 1 0.60003 0.91368 -80.729
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_B$trans)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.238107 -0.065328 0.004621 0.071437 0.174576
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.54215 0.04292 35.931 < 2e-16 ***
## dar -1.81260 0.27326 -6.633 9.1e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1168 on 23 degrees of freedom
## Multiple R-squared: 0.6567, Adjusted R-squared: 0.6418
## F-statistic: 44 on 1 and 23 DF, p-value: 9.097e-07
lm_B_trans <- function(dar){
1.58624 - 1.95257 * dar
}
beta_fits_B$trans %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_B_trans(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_B_trans <- res_B$trans %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$trans) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_trans(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
res_B$trans %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$trans) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_trans(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_B$trans) %>%
lm(shape1_norm ~ dar, data = beta_fits_B$trans) %>%
step() %>%
summary()
## Start: AIC=-127.15
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.13172 -127.15
## - dar 1 0.25199 0.38371 -102.42
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_B$trans)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.154303 -0.042335 0.002995 0.046294 0.113133
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.99937 0.02781 35.931 < 2e-16 ***
## dar -1.17464 0.17708 -6.633 9.1e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07568 on 23 degrees of freedom
## Multiple R-squared: 0.6567, Adjusted R-squared: 0.6418
## F-statistic: 44 on 1 and 23 DF, p-value: 9.097e-07
lm_B_trans_norm <- function(dar){
1.14804 - 1.41317 * dar
}
dar_tmp <- dar_B$trans
lm_all <- lm_all %>%
rbind(tibble(
dataset = "B_trans",
intercept = 1.14804,
slope = -1.41317,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_B$trans %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_B_trans_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_B_trans_norm <- res_B$trans %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$trans) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_trans_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
res_B$trans %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_B$trans) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_B_trans_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
meta_C <- read_csv(here("data/metadata/psen1.csv")) %>%
dplyr::select(-sample) %>%
dplyr::rename(sample = basename, genotype = Genotype) %>%
## We need some sample aliases that follow R naming conventions
mutate(
alias = c(
paste0(rep("fAD", 7), seq(1, 7)),
paste0(rep("fAI", 8), seq(1, 8)),
paste0(rep("wt", 9), seq(1, 9))
),
group = genotype
)
meta_C$genotype <- fct_relevel(
meta_C$genotype,
c("WT", "EOfAD-like/+", "fAI-like/+")
)
dar_C <- read_rds(here("data/dar/dar_fixed_C.Rds")) %>%
sortSeqlevels() %>%
endoapply(sort)
gene_dar_C <- read_rds("data/dar/gene_dar_fixed_C.Rds") %>%
sortSeqlevels() %>%
endoapply(sort)
dge_C <- read_rds("data/de/dgeList_psen1.Rds")
dge_C$samples <- dge_C$samples %>%
mutate(
genotype = case_when(
genotype == "fAI-like/+" ~ "fai",
genotype == "EOfAD-like/+" ~ "eofad",
genotype == "WT" ~ "wt"
),
genotype = factor(genotype, levels = c("wt", "eofad", "fai"))
)
dge_C <- DGEList(
counts = dge_C$counts,
samples = dge_C$samples,
genes = dge_C$genes
)
dge_C <- dge_C %>%
estimateDisp(design = model.matrix(~0 + genotype, .$sample))
colnames(dge_C$design) <- str_remove_all(colnames(dge_C$design), "^genotype")
contrasts_C <- makeContrasts(
eofad = fai - wt,
fai = eofad - wt,
levels = unique(dge_C$samples$genotype)
)
## Rerun the analysis using glmQL-Fits
res_C <- lapply(colnames(contrasts_C), function(x){
dge_C %>%
glmQLFit() %>%
glmQLFTest(contrast = contrasts_C[,x]) %>%
topTags(n = Inf) %>%
.[["table"]] %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
sortSeqlevels() %>%
sort()
}) %>%
set_names(colnames(contrasts_C))
lapply(names(res_C), function(x){
res_C[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
ggplot(aes(dar, -log10(PValue))) +
geom_point() +
labs(title = x)
})
lapply(names(res_C), function(x){
res_C[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar_q = cut(
dar,
breaks = quantile(dar, probs = seq(0, 1, length.out = 21), na.rm = TRUE),
# breaks = seq(0, 1, length.out = 7),
include.lowest = TRUE
) %>%
fct_na_value_to_level(levels(.)[[1]])
) %>%
ggplot(aes(PValue, after_stat(density))) +
geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
geom_line(
## Add 1 - dar^2 in red
aes(y = y, colour = f),
data = . %>%
pull(dar_q) %>%
levels() %>%
sapply(
\(x) {
min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
tibble(
PValue = seq(0.01, 1, by = 0.01),
`1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
`1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
`sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
)
},
simplify = FALSE
) %>%
bind_rows(.id = "dar_q") %>%
pivot_longer(
cols = contains("- DAR"), names_to = "f", values_to = "y"
) %>%
mutate(dar_q = fct_inorder(dar_q)),
) +
geom_label(
aes(x = 1, y = 0, label = label),
data = . %>%
summarise(n = dplyr::n(), .by = dar_q) %>%
mutate(label = paste("n =", comma(n))),
hjust = 1, vjust = 0, alpha = 0.7
) +
facet_wrap(~dar_q, scales = "free_y") +
labs(title = x)
})
beta_fits_C <- sapply(names(res_C), simplify = FALSE, function(x){
# browser()
res_C[[x]] %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
dplyr::filter(!is.na(dar)) %>%
# mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
mutate(
dar_q = cut(
dar,
unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
include.lowest = TRUE
)
) %>%
split(.$dar_q) %>%
lapply(
\(x) {
tryCatch({ # !!
fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
tibble(
dar = median(x$dar),
shape1 = fit$estimate["shape1"],
se1 = fit$sd["shape1"],
shape2 = fit$estimate["shape2"],
se2 = fit$sd["shape2"]
)
}, error = \(e){
tibble(
dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
)
})
}
) %>%
bind_rows() %>%
mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_C, simplify = FALSE, function(x){
as.data.frame(x)
})
## $eofad
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.0516492 0.03379651 1.0469804 0.03361715 0.8726949
## 2 0.01091588 1.2050593 0.05501863 1.2195521 0.05579869 1.0000000
## 3 0.01984127 1.0062750 0.04575874 0.9728413 0.04393538 0.8350419
## 4 0.02821869 1.0223467 0.04540182 1.0205202 0.04530455 0.8483787
## 5 0.03556463 0.9607481 0.04419799 1.0342756 0.04829636 0.7972620
## 6 0.04149300 1.0918347 0.04966723 1.1055569 0.05041100 0.9060423
## 7 0.04728401 0.9796968 0.04402270 1.0893014 0.04998645 0.8129864
## 8 0.05294419 0.9752321 0.04269245 1.0216589 0.04514764 0.8092814
## 9 0.05839593 1.0292494 0.04799304 1.1055744 0.05227203 0.8541068
## 10 0.06349206 0.9262727 0.04161233 0.9965386 0.04545224 0.7686532
## 11 0.06854257 0.9905245 0.04378336 1.0992170 0.04959329 0.8219716
## 12 0.07397548 0.9080890 0.04126998 1.0479619 0.04904841 0.7535637
## 13 0.07975834 0.9595551 0.04286855 1.0412356 0.04729226 0.7962721
## 14 0.08627932 0.9489121 0.04295761 1.0222127 0.04698381 0.7874402
## 15 0.09308390 1.0211514 0.04602375 1.1189878 0.05133479 0.8473869
## 16 0.10000000 0.9341859 0.04171355 0.9812842 0.04426623 0.7752199
## 17 0.10714286 0.9356994 0.04202065 1.1394177 0.05320668 0.7764758
## 18 0.11528415 0.9808316 0.04415574 1.0722643 0.04913533 0.8139281
## 19 0.12500000 0.9316319 0.04172656 1.0476917 0.04806032 0.7731005
## 20 0.13640782 0.9029964 0.03972615 1.0551631 0.04792830 0.7493377
## 21 0.15000000 0.9170364 0.04155176 1.0458633 0.04868210 0.7609886
## 22 0.16666667 0.9950321 0.04448694 1.2212485 0.05676706 0.8257121
## 23 0.19107143 0.9207534 0.04121691 1.0350035 0.04745662 0.7640731
## 24 0.24183187 0.8603681 0.03817618 1.0426690 0.04816242 0.7139632
##
## $fai
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.1486138 0.03764128 0.9516062 0.03005250 1.0000000
## 2 0.01428571 1.0957708 0.05070408 0.9068342 0.04038256 0.9539941
## 3 0.02262586 1.1195180 0.05192515 0.9154556 0.04077944 0.9746687
## 4 0.02922403 1.1261794 0.05190010 0.9835558 0.04414200 0.9804683
## 5 0.03639991 1.1342095 0.05263045 0.9266241 0.04130143 0.9874594
## 6 0.04394514 1.0312479 0.04733179 0.9121617 0.04082096 0.8978195
## 7 0.05169753 1.0489497 0.04822769 0.9221756 0.04129802 0.9132310
## 8 0.05837083 1.0815635 0.04958688 0.9870004 0.04443917 0.9416250
## 9 0.06548263 0.8891103 0.04019420 0.8805064 0.03972118 0.7740724
## 10 0.07232824 1.0177335 0.04662505 0.9141083 0.04095848 0.8860537
## 11 0.07893805 0.9467241 0.04316280 0.8771434 0.03934318 0.8242319
## 12 0.08541667 1.0239182 0.04659356 0.9971198 0.04513316 0.8914381
## 13 0.09189714 0.9965465 0.04542865 0.9448180 0.04260047 0.8676080
## 14 0.09925832 0.9790886 0.04452669 0.9401230 0.04239619 0.8524089
## 15 0.10698578 0.9750491 0.04433896 0.9102661 0.04080082 0.8488920
## 16 0.11548764 0.9898035 0.04513674 0.9585039 0.04342190 0.8617374
## 17 0.12447222 0.9363662 0.04257554 0.8895555 0.04000652 0.8152141
## 18 0.13414386 0.9813786 0.04408310 0.9232305 0.04094681 0.8544026
## 19 0.14372777 1.0465242 0.04866822 0.9532246 0.04349996 0.9111193
## 20 0.15502476 0.9670394 0.04378116 0.9740016 0.04416152 0.8419186
## 21 0.17061408 0.9559307 0.04330687 0.9564211 0.04333370 0.8322473
## 22 0.19311869 0.9562792 0.04336182 0.9399217 0.04246684 0.8325507
## 23 0.22261241 1.0061136 0.04588987 0.9518935 0.04292702 0.8759373
## 24 0.31047012 1.0423455 0.04777231 0.9406446 0.04222044 0.9074813
plotly::ggplotly({
plotDarECDF(dar = dar_C$eofad, dar_val = "origin")
})
lm(shape1 ~ dar + I(dar^2), data = beta_fits_C$eofad) %>%
# lm(shape1 ~ dar, data = beta_fits_C$eofad) %>%
step() %>%
summary()
## Start: AIC=-137.41
## shape1 ~ dar + I(dar^2)
##
## Df Sum of Sq RSS AIC
## <none> 0.060981 -137.41
## - I(dar^2) 1 0.0058392 0.066820 -137.21
## - dar 1 0.0210234 0.082004 -132.30
##
## Call:
## lm(formula = shape1 ~ dar + I(dar^2), data = beta_fits_C$eofad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.07043 -0.02797 -0.01275 0.02135 0.14485
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.07728 0.02901 37.133 <2e-16 ***
## dar -1.60333 0.59588 -2.691 0.0137 *
## I(dar^2) 3.62761 2.55818 1.418 0.1708
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05389 on 21 degrees of freedom
## Multiple R-squared: 0.4876, Adjusted R-squared: 0.4388
## F-statistic: 9.992 on 2 and 21 DF, p-value: 0.0008932
lm_C_eofad <- function(dar){
1.03004 - 0.77544 * dar
}
beta_fits_C$eofad %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_C_eofad(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_C_eofad <- res_C$eofad %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_C_eofad(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_C_eofad) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_C_eofad),
background = ifelse(darP_C_eofad$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 17:51206225-51224800:- | ENSDARG00000004870 | psen1 | protein_coding | -0.7621412 | 5.3665843 | 0.0e+00 | 0.0000000 | 0.3305692 | 0.7737035 | 0.00e+00 | 0.0000000 | TRUE | TRUE |
| 17:38738840-38778901:- | ENSDARG00000005179 | dglucy | protein_coding | -0.9308747 | 5.0312423 | 3.9e-06 | 0.0425888 | 0.0380146 | 1.0000000 | 3.90e-06 | 0.0425888 | TRUE | TRUE |
| 17:5611166-5622440:+ | ENSDARG00000067665 | fam167aa | protein_coding | -4.7628058 | 0.2296959 | 6.0e-06 | 0.0435244 | 0.1012693 | 0.9515118 | 1.07e-05 | 0.0779892 | TRUE | FALSE |
res_C$eofad %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_C_eofad(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_C$eofad) %>%
# lm(shape1_norm ~ dar, data = beta_fits_C$eofad) %>%
step() %>%
summary()
## Start: AIC=-146.36
## shape1_norm ~ dar + I(dar^2)
##
## Df Sum of Sq RSS AIC
## <none> 0.041993 -146.36
## - I(dar^2) 1 0.004021 0.046014 -146.16
## - dar 1 0.014477 0.056470 -141.25
##
## Call:
## lm(formula = shape1_norm ~ dar + I(dar^2), data = beta_fits_C$eofad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.05845 -0.02321 -0.01058 0.01772 0.12020
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.89396 0.02407 37.133 <2e-16 ***
## dar -1.33050 0.49448 -2.691 0.0137 *
## I(dar^2) 3.01032 2.12286 1.418 0.1708
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04472 on 21 degrees of freedom
## Multiple R-squared: 0.4876, Adjusted R-squared: 0.4388
## F-statistic: 9.992 on 2 and 21 DF, p-value: 0.0008932
lm_C_eofad_norm <- function(dar){
1.08284 - 0.81518 * dar
}
dar_tmp <- dar_C$eofad
lm_all <- lm_all %>%
rbind(tibble(
dataset = "C_eofad",
intercept = 1.08284,
slope = -0.81518,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_C$eofad %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_C_eofad_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_C_eofad_norm <- res_C$eofad %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_C_eofad_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_C_eofad_norm) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_C_eofad_norm),
background = ifelse(darP_C_eofad_norm$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 17:51206225-51224800:- | ENSDARG00000004870 | psen1 | protein_coding | -0.7621412 | 5.3665843 | 0.0e+00 | 0.0000000 | 0.3305692 | 0.8133666 | 0.0e+00 | 0.0000000 | TRUE | TRUE |
| 17:38738840-38778901:- | ENSDARG00000005179 | dglucy | protein_coding | -0.9308747 | 5.0312423 | 3.9e-06 | 0.0425888 | 0.0380146 | 1.0000000 | 3.9e-06 | 0.0425888 | TRUE | TRUE |
| 17:5611166-5622440:+ | ENSDARG00000067665 | fam167aa | protein_coding | -4.7628058 | 0.2296959 | 6.0e-06 | 0.0435244 | 0.1012693 | 1.0000000 | 6.0e-06 | 0.0435244 | TRUE | TRUE |
res_C$eofad %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_C_eofad_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_C$fai, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_C$fai) %>%
lm(shape1 ~ dar, data = beta_fits_C$fai) %>%
step() %>%
summary()
## Start: AIC=-131.06
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.086348 -131.06
## - dar 1 0.023431 0.109779 -127.30
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_C$fai)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.14715 -0.03109 -0.01499 0.04277 0.11335
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.06493 0.02218 48.021 <2e-16 ***
## dar -0.43783 0.17920 -2.443 0.023 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06265 on 22 degrees of freedom
## Multiple R-squared: 0.2134, Adjusted R-squared: 0.1777
## F-statistic: 5.97 on 1 and 22 DF, p-value: 0.02304
lm_C_fai <- function(dar){
1.27179 - 1.70850 * dar
}
beta_fits_C$fai %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_C_fai(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_C_fai <- res_C$fai %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C$fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_C_fai(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_C_fai) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_C_fai),
background = ifelse(darP_C_fai$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 17:49785495-49978986:- | ENSDARG00000078322 | col12a1a | protein_coding | 1.9045922 | 3.854349 | 0.0e+00 | 0.0000195 | 0.2215742 | 0.8932305 | 0.00e+00 | 0.0001801 | TRUE | TRUE |
| 17:50413084-50422584:- | ENSDARG00000093378 | si:ch211-235i11.5 | protein_coding | 0.6341595 | 3.057743 | 1.0e-07 | 0.0007415 | 0.0677979 | 1.0000000 | 1.00e-07 | 0.0007415 | TRUE | TRUE |
| 7:32901658-32924520:+ | ENSDARG00000079324 | ano9b | protein_coding | 1.4319607 | -1.240503 | 1.5e-06 | 0.0100260 | 0.0669498 | 1.0000000 | 1.50e-06 | 0.0106873 | TRUE | TRUE |
| 17:51224421-51229737:+ | ENSDARG00000020448 | adi1 | protein_coding | 0.6019222 | 2.881561 | 1.8e-06 | 0.0100260 | 0.3331819 | 0.7025487 | 9.33e-05 | 0.2268036 | TRUE | FALSE |
res_C$fai %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C$fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_C_fai(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_C$fai) %>%
lm(shape1_norm ~ dar, data = beta_fits_C$fai) %>%
step() %>%
summary()
## Start: AIC=-137.71
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.065449 -137.71
## - dar 1 0.01776 0.083209 -133.95
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_C$fai)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.12811 -0.02707 -0.01305 0.03724 0.09868
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.92715 0.01931 48.021 <2e-16 ***
## dar -0.38118 0.15601 -2.443 0.023 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05454 on 22 degrees of freedom
## Multiple R-squared: 0.2134, Adjusted R-squared: 0.1777
## F-statistic: 5.97 on 1 and 22 DF, p-value: 0.02304
lm_C_fai_norm <- function(dar){
1.1434 - 1.5360 * dar
}
dar_tmp <- dar_C$fai
lm_all <- lm_all %>%
rbind(tibble(
dataset = "C_fai",
intercept = 1.1434,
slope = -1.5360,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_C$fai %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_C_fai_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_C_fai_norm <- res_C$fai %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C$fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_C_fai_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
kable(darP_C_fai_norm) %>%
kable_paper() %>%
column_spec(
1:ncol(darP_C_fai_norm),
background = ifelse(darP_C_fai_norm$isDE, "green4", "red3")
)
| range | gene_id | gene_name | gene_biotype | logFC | logCPM | PValue | FDR | dar | alpha | darP | FDR_darP | wasDE | isDE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 17:49785495-49978986:- | ENSDARG00000078322 | col12a1a | protein_coding | 1.9045922 | 3.854349 | 0.0e+00 | 0.0000195 | 0.2215742 | 0.8030620 | 1.00e-07 | 0.0007415 | TRUE | TRUE |
| 17:50413084-50422584:- | ENSDARG00000093378 | si:ch211-235i11.5 | protein_coding | 0.6341595 | 3.057743 | 1.0e-07 | 0.0007415 | 0.0677979 | 1.0000000 | 1.00e-07 | 0.0007415 | TRUE | TRUE |
| 7:32901658-32924520:+ | ENSDARG00000079324 | ano9b | protein_coding | 1.4319607 | -1.240503 | 1.5e-06 | 0.0100260 | 0.0669498 | 1.0000000 | 1.50e-06 | 0.0106873 | TRUE | TRUE |
| 17:51224421-51229737:+ | ENSDARG00000020448 | adi1 | protein_coding | 0.6019222 | 2.881561 | 1.8e-06 | 0.0100260 | 0.3331819 | 0.6316326 | 2.38e-04 | 0.4003521 | TRUE | FALSE |
res_C$fai %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_C$fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_C_fai_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
meta_D <- read_tsv(here("data/metadata/apoe_synapse.tsv")) %>%
left_join(read_csv(here("data/metadata/apoe.csv"))) %>%
dplyr::select(
sample = specimenID, species, genotypeBackground, litter, dateBirth,
dateDeath, genotype = Genotype, sex = Sex, age = Age, lane, basename = name,
modelSystemName, individualID, study
) %>%
dplyr::filter(str_detect(sample, "_3M_")) %>%
mutate(basename = str_remove(basename, ".bam_R(1|2).fastq.gz")) %>%
distinct(sample, .keep_all = TRUE) %>%
mutate(
group = as.factor(paste0(genotype, "_", age, "_", sex)),
genotype = as.factor(genotype)
) %>%
dplyr::arrange(genotype, group)
dar_D <- read_rds(here("data/dar/dar_loci_D.Rds")) %>% # !!
sortSeqlevels() %>%
endoapply(sort)
gene_dar_D <- read_rds("data/dar/gene_dar_loci_D.Rds") %>% # !!
sortSeqlevels() %>%
endoapply(sort)
dge_D <- read_rds("data/de/dgeList_apoe.Rds")
dge_D <- DGEList(
counts = dge_D$counts,
samples = dge_D$samples,
genes = dge_D$genes
)
dge_D <- dge_D %>%
estimateDisp(design = model.matrix(~0 + group, .$sample))
colnames(dge_D$design) <- str_remove_all(colnames(dge_D$design), "^group")
contrasts_D <- makeContrasts(
apoe2v3_female = APOE2_3M_female - APOE3_3M_female,
apoe2v3_male = APOE2_3M_male - APOE3_3M_male,
apoe4v3_female = APOE4_3M_female - APOE3_3M_female,
apoe4v3_male = APOE4_3M_male - APOE3_3M_male,
levels = dge_D$samples$group
)
## Rerun the analysis using glmQL-Fits
res_D <- lapply(colnames(contrasts_D), function(x){
dge_D %>%
glmQLFit() %>%
glmQLFTest(contrast = contrasts_D[,x]) %>%
topTags(n = Inf) %>%
.[["table"]] %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
sortSeqlevels() %>%
sort()
}) %>%
set_names(colnames(contrasts_D))
lapply(names(res_D), function(x){
res_D[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
ggplot(aes(dar, -log10(PValue))) +
geom_point() +
labs(title = x)
})
lapply(names(res_D), function(x){
res_D[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar_q = cut(
dar,
breaks = quantile(dar, probs = seq(0, 1, length.out = 21), na.rm = TRUE),
# breaks = seq(0, 1, length.out = 7),
include.lowest = TRUE
) %>%
fct_na_value_to_level(levels(.)[[1]])
) %>%
ggplot(aes(PValue, after_stat(density))) +
geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
geom_line(
## Add 1 - dar^2 in red
aes(y = y, colour = f),
data = . %>%
pull(dar_q) %>%
levels() %>%
sapply(
\(x) {
min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
tibble(
PValue = seq(0.01, 1, by = 0.01),
`1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
`1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
`sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
)
},
simplify = FALSE
) %>%
bind_rows(.id = "dar_q") %>%
pivot_longer(
cols = contains("- DAR"), names_to = "f", values_to = "y"
) %>%
mutate(dar_q = fct_inorder(dar_q)),
) +
geom_label(
aes(x = 1, y = 0, label = label),
data = . %>%
summarise(n = dplyr::n(), .by = dar_q) %>%
mutate(label = paste("n =", comma(n))),
hjust = 1, vjust = 0, alpha = 0.7
) +
facet_wrap(~dar_q, scales = "free_y") +
labs(title = x)
})
beta_fits_D <- sapply(names(res_D), simplify = FALSE, function(x){
res_D[[x]] %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
dplyr::filter(!is.na(dar)) %>%
# mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
mutate(
dar_q = cut(
dar,
unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
include.lowest = TRUE
)
) %>%
split(.$dar_q) %>%
lapply(
\(x) {
tryCatch({ # !!
fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
tibble(
dar = median(x$dar),
shape1 = fit$estimate["shape1"],
se1 = fit$sd["shape1"],
shape2 = fit$estimate["shape2"],
se2 = fit$sd["shape2"]
)
}, error = \(e){
tibble(
dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
)
})
}
) %>%
bind_rows() %>%
mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_D, simplify = FALSE, function(x){
as.data.frame(x)
})
## $apoe2v3_female
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.005895809 0.4408589 0.02192272 0.9516502 0.05768450 0.9264683
## 2 0.017029221 0.4443325 0.02205732 1.0054818 0.06143302 0.9337680
## 3 0.021807359 0.4102111 0.02021430 1.0301564 0.06439452 0.8620618
## 4 0.024431818 0.4240079 0.02093294 1.0311654 0.06392025 0.8910559
## 5 0.027083333 0.4166771 0.02057051 0.9868581 0.06092134 0.8756501
## 6 0.029788961 0.4243520 0.02096748 0.9233403 0.05588815 0.8917790
## 7 0.031982241 0.4588475 0.02283998 1.0678088 0.06561263 0.9642714
## 8 0.034334416 0.4167097 0.02043282 0.8990105 0.05397516 0.8757185
## 9 0.036147186 0.4185472 0.02071815 0.9779498 0.06032709 0.8795800
## 10 0.037813853 0.4612477 0.02303983 1.0836409 0.06691689 0.9693155
## 11 0.039393939 0.4534533 0.02268309 1.0239855 0.06289999 0.9529354
## 12 0.041071429 0.4758489 0.02369323 1.0545870 0.06394338 1.0000000
## 13 0.042576741 0.3908935 0.01920248 1.0435953 0.06623182 0.8214655
## 14 0.044372294 0.4121025 0.02014629 1.0236513 0.06330023 0.8660365
## 15 0.046428571 0.4536583 0.02261145 1.1162569 0.06955509 0.9533664
## 16 0.048674242 0.4548271 0.02269120 1.0748324 0.06646314 0.9558226
## 17 0.051055195 0.4109288 0.02031701 0.9245324 0.05657469 0.8635699
## 18 0.054215368 0.4557003 0.02266379 1.0484602 0.06425632 0.9576577
## 19 0.058028335 0.4390151 0.02175801 1.0313853 0.06349385 0.9225934
## 20 0.063950216 0.3919633 0.01916247 0.9207390 0.05649594 0.8237139
## 21 0.070596398 0.4321311 0.02151665 1.0258984 0.06368911 0.9081267
## 22 0.080438312 0.4336465 0.02155631 0.9389897 0.05701425 0.9113113
## 23 0.092478355 0.4368029 0.02153816 1.0792315 0.06684404 0.9179446
## 24 0.113849432 0.4141398 0.02048003 0.9733418 0.06012076 0.8703179
## 25 0.207954545 0.2453864 0.01161003 0.7515342 0.04971101 0.5156814
##
## $apoe2v3_male
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.01003542 0.7070445 0.03749538 0.8796560 0.04907089 0.8913257
## 2 0.01837121 0.6895401 0.03628600 0.9233918 0.05197575 0.8692591
## 3 0.02253788 0.7308151 0.03853632 0.9363788 0.05222089 0.9212919
## 4 0.02605519 0.6647743 0.03489683 0.8545684 0.04761281 0.8380384
## 5 0.02905844 0.6354534 0.03303564 0.9006440 0.05085790 0.8010754
## 6 0.03149351 0.6821956 0.03659039 0.8600777 0.04871496 0.8600003
## 7 0.03402056 0.7729462 0.04106069 0.9667523 0.05395412 0.9744038
## 8 0.03608323 0.7440700 0.03950806 0.9927646 0.05618936 0.9380015
## 9 0.03790584 0.7177556 0.03727992 0.9142557 0.05018615 0.9048286
## 10 0.03988095 0.6517843 0.03491851 0.8784730 0.05051026 0.8216627
## 11 0.04199134 0.7932504 0.04215423 0.9945657 0.05551381 1.0000000
## 12 0.04376623 0.6537008 0.03436996 0.9036548 0.05126982 0.8240788
## 13 0.04529221 0.7431380 0.03957037 0.9071147 0.05053620 0.9368266
## 14 0.04784091 0.7140229 0.03785418 0.8876071 0.04947811 0.9001229
## 15 0.04980700 0.6776041 0.03561545 0.9329660 0.05279801 0.8542121
## 16 0.05224116 0.6989092 0.03644656 0.9396902 0.05243465 0.8810701
## 17 0.05492424 0.6431253 0.03387721 0.9468421 0.05459116 0.8107469
## 18 0.05743661 0.7614044 0.04024522 0.9393213 0.05203211 0.9598538
## 19 0.06087662 0.7219788 0.03846509 0.8982060 0.05031037 0.9101525
## 20 0.06525974 0.7222349 0.03843846 0.9112787 0.05114400 0.9104754
## 21 0.07305195 0.6909656 0.03615089 1.0105339 0.05759775 0.8710562
## 22 0.08363456 0.6419801 0.03348786 0.9436218 0.05386195 0.8093032
## 23 0.10108225 0.6680077 0.03498514 0.9454049 0.05365310 0.8421146
## 24 0.12421537 0.6178671 0.03234848 0.8173550 0.04581641 0.7789055
## 25 0.22518939 0.2981008 0.01452833 0.6412545 0.03914084 0.3757966
##
## $apoe4v3_female
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.01715909 0.6935597 0.03666605 0.8571220 0.04761836 0.8802177
## 2 0.02289945 0.6383718 0.03347879 0.8862999 0.05025071 0.8101770
## 3 0.02678571 0.7220083 0.03813736 0.9630147 0.05427617 0.9163226
## 4 0.02954545 0.6777475 0.03533948 0.9124303 0.05097841 0.8601500
## 5 0.03227814 0.6656931 0.03526700 0.9046623 0.05149172 0.8448514
## 6 0.03422619 0.6440026 0.03384201 0.8453954 0.04741893 0.8173233
## 7 0.03613243 0.6135572 0.03195911 0.8538744 0.04818206 0.7786841
## 8 0.03802083 0.6375517 0.03319393 0.9332391 0.05313825 0.8091363
## 9 0.04024351 0.6698611 0.03523461 0.8829097 0.04955267 0.8501411
## 10 0.04219697 0.7272793 0.03862612 0.9375872 0.05274120 0.9230123
## 11 0.04431818 0.7381502 0.03928304 0.9138834 0.05104884 0.9368088
## 12 0.04638528 0.6829490 0.03606691 0.8561623 0.04768513 0.8667514
## 13 0.04853896 0.6821394 0.03595566 0.8964459 0.05035559 0.8657238
## 14 0.05096419 0.6740780 0.03535393 0.9178848 0.05172020 0.8554929
## 15 0.05413510 0.6980756 0.03678142 0.9389514 0.05294476 0.8859490
## 16 0.05833333 0.7563666 0.04044936 0.9247554 0.05173802 0.9599278
## 17 0.06185065 0.7879411 0.04198970 0.9803848 0.05480031 1.0000000
## 18 0.06780303 0.7341560 0.03852348 1.0136367 0.05712156 0.9317397
## 19 0.07391324 0.7249206 0.03844740 0.9877847 0.05613905 0.9200188
## 20 0.08255411 0.7514358 0.03986454 0.9724617 0.05463010 0.9536700
## 21 0.09359217 0.7434977 0.03873686 1.0146336 0.05661032 0.9435955
## 22 0.10587121 0.5928451 0.03132055 0.8146662 0.04654984 0.7523977
## 23 0.11870130 0.7032247 0.03726117 0.9020024 0.05061759 0.8924838
## 24 0.14347944 0.6549423 0.03411317 0.8927690 0.05002193 0.8312071
## 25 0.57206633 0.3111342 0.01534392 0.6262067 0.03789130 0.3948699
##
## $apoe4v3_male
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.01420455 0.3725740 0.018275029 0.8675891 0.05353546 0.9530350
## 2 0.02229437 0.3567168 0.017389213 0.8605068 0.05341421 0.9124728
## 3 0.02517587 0.3683323 0.018032883 0.8628615 0.05327017 0.9421850
## 4 0.02825758 0.3669960 0.017953374 0.8542788 0.05263468 0.9387666
## 5 0.03130780 0.3847493 0.018782922 0.8766585 0.05341932 0.9841791
## 6 0.03375000 0.3389318 0.016626008 0.8544133 0.05418042 0.8669791
## 7 0.03615260 0.3276340 0.015903688 0.8498837 0.05386473 0.8380797
## 8 0.03827922 0.3509945 0.017072133 0.8236148 0.05073704 0.8978352
## 9 0.04050325 0.3816951 0.018713943 0.9638332 0.06044496 0.9763666
## 10 0.04236472 0.3314705 0.015985673 0.8803795 0.05569667 0.8478934
## 11 0.04494048 0.3545042 0.017322167 0.8689680 0.05430426 0.9068130
## 12 0.04696970 0.3447502 0.016812664 0.8492054 0.05315652 0.8818624
## 13 0.04903950 0.3460643 0.016789326 0.8881060 0.05583386 0.8852238
## 14 0.05159091 0.3872454 0.019083628 0.9266671 0.05762254 0.9905641
## 15 0.05473731 0.3823639 0.018690182 1.0193857 0.06450392 0.9780773
## 16 0.05812771 0.3779848 0.018527386 0.8623896 0.05285629 0.9668758
## 17 0.06224026 0.3409698 0.016633905 0.8086566 0.05021197 0.8721923
## 18 0.06680646 0.3564419 0.017489721 0.8094519 0.04983988 0.9117695
## 19 0.07306818 0.3832031 0.018857892 0.8659887 0.05310794 0.9802241
## 20 0.08106061 0.3358194 0.016315572 0.8342687 0.05225068 0.8590176
## 21 0.09227420 0.3649044 0.017465796 0.9476836 0.05864959 0.9334164
## 22 0.10597944 0.3388895 0.016655153 0.9247661 0.05987964 0.8668709
## 23 0.12145563 0.3341856 0.016244916 0.8464944 0.05332085 0.8548384
## 24 0.15549242 0.3909342 0.019239594 0.9050027 0.05572511 1.0000000
## 25 0.62691739 0.1988180 0.009277477 0.6740541 0.04604119 0.5085714
plotly::ggplotly({
plotDarECDF(dar = dar_D$apoe2v3_female, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$apoe2v3_female) %>%
lm(shape1 ~ dar, data = beta_fits_D$apoe2v3_female) %>%
step() %>%
summary()
## Start: AIC=-172.9
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.021128 -172.9
## - dar 1 0.023862 0.044990 -156.0
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_D$apoe2v3_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.057750 -0.022453 -0.008388 0.024811 0.043194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.46606 0.01014 45.976 < 2e-16 ***
## dar -0.78347 0.15372 -5.097 3.67e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03031 on 23 degrees of freedom
## Multiple R-squared: 0.5304, Adjusted R-squared: 0.51
## F-statistic: 25.98 on 1 and 23 DF, p-value: 3.671e-05
lm_D_apoe2v3_female <- function(dar){
0.52339 - 1.07314 * dar
}
beta_fits_D$apoe2v3_female %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_D_apoe2v3_female(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_D_apoe2v3_female <- res_D$apoe2v3_female %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe2v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe2v3_female(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_D_apoe2v3_female$isDE)
##
## FALSE TRUE
## 1470 2
res_D$apoe2v3_female %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe2v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe2v3_female(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_D$apoe2v3_female) %>%
lm(shape1_norm ~ dar, data = beta_fits_D$apoe2v3_female) %>%
step() %>%
summary()
## Start: AIC=-135.77
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.093307 -135.77
## - dar 1 0.10538 0.198689 -118.87
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_D$apoe2v3_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.12136 -0.04718 -0.01763 0.05214 0.09077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9794 0.0213 45.976 < 2e-16 ***
## dar -1.6465 0.3231 -5.097 3.67e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06369 on 23 degrees of freedom
## Multiple R-squared: 0.5304, Adjusted R-squared: 0.51
## F-statistic: 25.98 on 1 and 23 DF, p-value: 3.671e-05
lm_D_apoe2v3_female_norm <- function(dar){
1.10382 - 2.26321 * dar
}
dar_tmp <- dar_D$apoe2v3_female
lm_all <- lm_all %>%
rbind(tibble(
dataset = "D_apoe2v3_female",
intercept = 1.10382,
slope = -2.26321,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_D$apoe2v3_female %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_D_apoe2v3_female_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_D_apoe2v3_female_norm <- res_D$apoe2v3_female %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe2v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe2v3_female_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_D_apoe2v3_female_norm$isDE)
##
## FALSE TRUE
## 404 1068
res_D$apoe2v3_female %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe2v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe2v3_female_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_D$apoe2v3_male, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$apoe2v3_male) %>%
lm(shape1 ~ dar, data = beta_fits_D$apoe2v3_male) %>%
step() %>%
summary()
## Start: AIC=-142.35
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.071713 -142.35
## - dar 1 0.13078 0.202492 -118.40
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_D$apoe2v3_male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.097239 -0.049590 0.006362 0.047731 0.086947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.77758 0.01854 41.933 < 2e-16 ***
## dar -1.69742 0.26209 -6.476 1.31e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05584 on 23 degrees of freedom
## Multiple R-squared: 0.6459, Adjusted R-squared: 0.6305
## F-statistic: 41.94 on 1 and 23 DF, p-value: 1.312e-06
lm_D_apoe2v3_male <- function(dar){
0.65566 - 1.46188 * dar
}
beta_fits_D$apoe2v3_male %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_D_apoe2v3_male(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_D_apoe2v3_male <- res_D$apoe2v3_male %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe2v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe2v3_male(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_D_apoe2v3_male$isDE)
##
## FALSE TRUE
## 128 5
res_D$apoe2v3_male %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe2v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe2v3_male(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_D$apoe2v3_male) %>%
lm(shape1_norm ~ dar, data = beta_fits_D$apoe2v3_male) %>%
step() %>%
summary()
## Start: AIC=-130.77
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.11397 -130.77
## - dar 1 0.20784 0.32180 -106.82
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_D$apoe2v3_male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.122583 -0.062515 0.008021 0.060172 0.109608
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.98025 0.02338 41.933 < 2e-16 ***
## dar -2.13983 0.33040 -6.476 1.31e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07039 on 23 degrees of freedom
## Multiple R-squared: 0.6459, Adjusted R-squared: 0.6305
## F-statistic: 41.94 on 1 and 23 DF, p-value: 1.312e-06
lm_D_apoe2v3_male_norm <- function(dar){
1.12123 - 2.49993 * dar
}
dar_tmp <- dar_D$apoe2v3_male
lm_all <- lm_all %>%
rbind(tibble(
dataset = "D_apoe2v3_male",
intercept = 1.12123,
slope = -2.49993,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_D$apoe2v3_male %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_D_apoe2v3_male_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_D_apoe2v3_male_norm <- res_D$apoe2v3_male %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe2v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe2v3_male_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_D_apoe2v3_male_norm$isDE)
##
## FALSE TRUE
## 102 31
res_D$apoe2v3_male %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe2v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe2v3_male_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_D$apoe4v3_female, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$apoe4v3_female) %>%
lm(shape1 ~ dar, data = beta_fits_D$apoe4v3_female) %>%
step() %>%
summary()
## Start: AIC=-143.06
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.069701 -143.06
## - dar 1 0.12397 0.193670 -119.51
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_D$apoe4v3_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.09121 -0.03570 -0.01434 0.04539 0.10035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.72890 0.01368 53.300 < 2e-16 ***
## dar -0.66787 0.10442 -6.396 1.58e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05505 on 23 degrees of freedom
## Multiple R-squared: 0.6401, Adjusted R-squared: 0.6245
## F-statistic: 40.91 on 1 and 23 DF, p-value: 1.585e-06
lm_D_apoe4v3_female <- function(dar){
0.64361 - 0.59121 * dar
}
beta_fits_D$apoe4v3_female %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_D_apoe4v3_female(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_D_apoe4v3_female <- res_D$apoe4v3_female %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe4v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe4v3_female(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_D_apoe4v3_female$isDE)
##
## FALSE TRUE
## 171 18
res_D$apoe4v3_female %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe4v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe4v3_female(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_D$apoe4v3_female) %>%
lm(shape1_norm ~ dar, data = beta_fits_D$apoe4v3_female) %>%
step() %>%
summary()
## Start: AIC=-131.14
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.11227 -131.14
## - dar 1 0.19967 0.31194 -107.59
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_D$apoe4v3_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11576 -0.04531 -0.01820 0.05760 0.12736
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.92507 0.01736 53.300 < 2e-16 ***
## dar -0.84761 0.13252 -6.396 1.58e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06987 on 23 degrees of freedom
## Multiple R-squared: 0.6401, Adjusted R-squared: 0.6245
## F-statistic: 40.91 on 1 and 23 DF, p-value: 1.585e-06
lm_D_apoe4v3_female_norm <- function(dar){
1.06010 - 0.97379 * dar
}
dar_tmp <- dar_D$apoe4v3_female
lm_all <- lm_all %>%
rbind(tibble(
dataset = "D_apoe4v3_female",
intercept = 1.06010,
slope = -0.97379,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_D$apoe4v3_female %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_D_apoe4v3_female_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_D_apoe4v3_female_norm <- res_D$apoe4v3_female %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe4v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe4v3_female_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_D_apoe4v3_female_norm$isDE)
##
## FALSE TRUE
## 76 113
res_D$apoe4v3_female %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe4v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe4v3_female_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_D$apoe4v3_male, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$apoe4v3_male) %>%
lm(shape1 ~ dar, data = beta_fits_D$apoe4v3_male) %>%
step() %>%
summary()
## Start: AIC=-188.4
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.011368 -188.4
## - dar 1 0.022646 0.034013 -163.0
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_D$apoe4v3_male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03644 -0.01467 -0.00710 0.01875 0.05777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.373438 0.005401 69.147 < 2e-16 ***
## dar -0.258998 0.038262 -6.769 6.65e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02223 on 23 degrees of freedom
## Multiple R-squared: 0.6658, Adjusted R-squared: 0.6513
## F-statistic: 45.82 on 1 and 23 DF, p-value: 6.645e-07
lm_D_apoe4v3_male <- function(dar){
0.339208 - 0.229183 * dar
}
beta_fits_D$apoe4v3_male %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_D_apoe4v3_male(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_D_apoe4v3_male <- res_D$apoe4v3_male %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe4v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe4v3_male(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_D_apoe4v3_male$isDE)
##
## FALSE TRUE
## 2662 2
res_D$apoe4v3_male %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe4v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe4v3_male(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_D$apoe4v3_male) %>%
lm(shape1_norm ~ dar, data = beta_fits_D$apoe4v3_male) %>%
step() %>%
summary()
## Start: AIC=-141.44
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.07438 -141.44
## - dar 1 0.14818 0.22256 -116.04
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_D$apoe4v3_male)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.09321 -0.03753 -0.01816 0.04796 0.14777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.95524 0.01381 69.147 < 2e-16 ***
## dar -0.66251 0.09787 -6.769 6.65e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05687 on 23 degrees of freedom
## Multiple R-squared: 0.6658, Adjusted R-squared: 0.6513
## F-statistic: 45.82 on 1 and 23 DF, p-value: 6.645e-07
lm_D_apoe4v3_male_norm <- function(dar){
1.05298 - 0.71144 * dar
}
dar_tmp <- dar_D$apoe4v3_male
lm_all <- lm_all %>%
rbind(tibble(
dataset = "D_apoe4v3_male",
intercept = 1.05298,
slope = -0.71144,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_D$apoe4v3_male %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_D_apoe4v3_male_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_D_apoe4v3_male_norm <- res_D$apoe4v3_male %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe4v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe4v3_male_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_D_apoe4v3_male_norm$isDE)
##
## FALSE TRUE
## 182 2482
res_D$apoe4v3_male %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_D$apoe4v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_D_apoe4v3_male_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
meta_E <- read_csv(here("data/metadata/pktu.csv")) %>%
mutate(
geno_psen1 = fct_relevel(geno_psen1, c("wt", "eofad", "fai")),
geno_chr14 = fct_relevel(geno_chr14, c("tu", "pk", "het")),
geno_joint = fct_relevel(geno_joint, unique(geno_joint))
)
dar_E <- read_rds(here("data/dar/dar_fixed_E.Rds")) %>%
sortSeqlevels() %>%
endoapply(sort)
gene_dar_E <- read_rds("data/dar/gene_dar_fixed_E.Rds") %>%
sortSeqlevels() %>%
endoapply(sort)
dge_E <- read_rds("data/de/dgeList_pktu.Rds")
dge_E <- DGEList(
counts = dge_E$counts,
samples = dge_E$samples,
genes = dge_E$genes
)
contrasts_E1 <- makeContrasts(
eofad_wt = eofad - wt,
fai_wt = fai - wt,
eofad_fai = eofad - fai,
levels = dge_E$samples$geno_psen1
)
contrasts_E2 <- makeContrasts(
pk_tu = pk - tu,
pk_het = pk - het,
tu_het = tu - het,
levels = dge_E$samples$geno_chr14
)
## Rerun the analysis using glmQL-Fits
res_E <- lapply(colnames(contrasts_E1), function(x){
design <- model.matrix(~0 + geno_psen1, data = dge_E$samples) %>%
set_colnames(str_remove(colnames(.), "geno_psen1"))
dge_E %>%
estimateDisp(design = design) %>%
glmQLFit() %>%
glmQLFTest(contrast = contrasts_E1[,x]) %>%
topTags(n = Inf) %>%
.[["table"]] %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
sortSeqlevels() %>%
sort()
}) %>%
set_names(colnames(contrasts_E1))
res_E2 <- lapply(colnames(contrasts_E2), function(x){
design <- model.matrix(~0 + geno_chr14, data = dge_E$samples) %>%
set_colnames(str_remove(colnames(.), "geno_chr14"))
dge_E %>%
estimateDisp(design = design) %>%
glmQLFit() %>%
glmQLFTest(contrast = contrasts_E2[,x]) %>%
topTags(n = Inf) %>%
.[["table"]] %>%
makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
sortSeqlevels() %>%
sort()
}) %>%
set_names(colnames(contrasts_E2))
res_E$pk_tu <- res_E2$pk_tu
res_E$pk_het <- res_E2$pk_het
res_E$tu_het <- res_E2$tu_het
lapply(names(res_E), function(x){
res_E[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
ggplot(aes(dar, -log10(PValue))) +
geom_point() +
labs(title = x)
})
lapply(names(res_E), function(x){
res_E[[x]] %>%
mcols() %>%
as_tibble() %>%
dplyr::select(
starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar_q = cut(
dar,
breaks = quantile(dar, probs = seq(0, 1, length.out = 5), na.rm = TRUE),
# breaks = seq(0, 1, length.out = 7),
include.lowest = TRUE
) %>%
fct_na_value_to_level(levels(.)[[1]])
) %>%
ggplot(aes(PValue, after_stat(density))) +
geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
geom_line(
## Add 1 - dar^2 in red
aes(y = y, colour = f),
data = . %>%
pull(dar_q) %>%
levels() %>%
sapply(
\(x) {
min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
tibble(
PValue = seq(0.01, 1, by = 0.01),
`1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
`1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
`sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
)
},
simplify = FALSE
) %>%
bind_rows(.id = "dar_q") %>%
pivot_longer(
cols = contains("- DAR"), names_to = "f", values_to = "y"
) %>%
mutate(dar_q = fct_inorder(dar_q)),
) +
geom_label(
aes(x = 1, y = 0, label = label),
data = . %>%
summarise(n = dplyr::n(), .by = dar_q) %>%
mutate(label = paste("n =", comma(n))),
hjust = 1, vjust = 0, alpha = 0.7
) +
facet_wrap(~dar_q, scales = "free_y") +
labs(title = x)
})
beta_fits_E <- sapply(names(res_E), simplify = FALSE, function(x){
res_E[[x]] %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
dplyr::filter(!is.na(dar)) %>%
# mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
mutate(
dar_q = cut(
dar,
unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
include.lowest = TRUE
)
) %>%
split(.$dar_q) %>%
lapply(
\(x) {
tryCatch({ # !!
fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
tibble(
dar = median(x$dar),
shape1 = fit$estimate["shape1"],
se1 = fit$sd["shape1"],
shape2 = fit$estimate["shape2"],
se2 = fit$sd["shape2"]
)
}, error = \(e){
tibble(
dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
)
})
}
) %>%
bind_rows() %>%
mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_E, simplify = FALSE, function(x){
as.data.frame(x)
})
## $eofad_wt
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.0047821 0.02462011 0.9957088 0.02435312 0.7373614
## 2 0.01625964 1.1518700 0.05706128 1.0153419 0.04909292 0.8453021
## 3 0.02777778 1.2490637 0.06270209 1.0721270 0.05233116 0.9166279
## 4 0.03612900 1.1984385 0.05988093 1.0181983 0.04932720 0.8794765
## 5 0.04225415 1.2455501 0.06228706 1.0628240 0.05161705 0.9140494
## 6 0.04861111 1.2523257 0.06281742 1.0242350 0.04948565 0.9190217
## 7 0.05555556 1.2267509 0.05277443 1.1087697 0.04682630 0.9002536
## 8 0.05698724 1.3626725 0.08387104 1.1790374 0.07078243 1.0000000
## 9 0.06349206 1.2085487 0.05972895 1.1004127 0.05345464 0.8868959
## 10 0.07042187 1.2510050 0.06160070 1.1101522 0.05349386 0.9180525
## 11 0.07733962 1.1852538 0.05961707 1.1548787 0.05781519 0.8698009
## 12 0.08333333 1.2687521 0.06306519 1.1919307 0.05859943 0.9310763
## 13 0.09259259 1.2203609 0.06078018 1.0763801 0.05237954 0.8955643
## 14 0.10040293 1.1285802 0.05591439 1.0357944 0.05047820 0.8282109
## 15 0.11111111 1.1793361 0.05567982 1.1167343 0.05219602 0.8654582
## 16 0.11728395 1.1332346 0.05900378 1.0782800 0.05561652 0.8316265
## 17 0.12829936 1.0456292 0.05134791 1.0330387 0.05060860 0.7673371
## 18 0.14177395 0.9541102 0.04642553 1.0078933 0.04959575 0.7001757
## 19 0.15912004 1.0564019 0.05071817 1.0229518 0.04880167 0.7752427
## 20 0.17876292 0.9518843 0.04727065 1.0184969 0.05128154 0.6985422
## 21 0.21382120 0.8762720 0.04229303 1.0343877 0.05168071 0.6430540
## 22 0.28132184 0.6687867 0.03133762 0.9083512 0.04571536 0.4907905
##
## $fai_wt
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.000000000 1.0469079 0.02971339 1.018230 0.02874095 0.7580235
## 2 0.007936508 1.2741840 0.06358669 1.126344 0.05498011 0.9225850
## 3 0.018518519 1.2510786 0.06262316 1.057093 0.05129386 0.9058553
## 4 0.027777778 1.2116317 0.06044033 1.045319 0.05072271 0.8772934
## 5 0.035000200 1.2098452 0.05989886 1.096743 0.05332656 0.8759998
## 6 0.040380915 1.2964460 0.06495358 1.170199 0.05758249 0.9387040
## 7 0.044894406 1.2156032 0.06070682 1.036019 0.05021018 0.8801690
## 8 0.050000000 1.2723038 0.06352781 1.116020 0.05442578 0.9212236
## 9 0.055555556 1.1408010 0.04708284 1.057949 0.04304185 0.8260077
## 10 0.056172837 1.3811021 0.09259679 1.174494 0.07658527 1.0000000
## 11 0.061111111 1.3202811 0.06621206 1.112198 0.05410094 0.9559620
## 12 0.066666667 1.2582029 0.06264680 1.128821 0.05511841 0.9110137
## 13 0.072649573 1.2437282 0.06220366 1.059093 0.05142014 0.9005331
## 14 0.077777778 1.2937477 0.06479675 1.108874 0.05402300 0.9367503
## 15 0.083357570 1.2694053 0.06295593 1.207309 0.05935297 0.9191249
## 16 0.090908634 1.2139036 0.06023975 1.129279 0.05530777 0.8789384
## 17 0.098189487 1.2682216 0.06340682 1.092757 0.05317752 0.9182678
## 18 0.107077273 1.1505282 0.05492862 1.030953 0.04819736 0.8330508
## 19 0.112906491 1.1743535 0.06068854 1.111872 0.05688093 0.8503017
## 20 0.125062524 1.2530417 0.06234553 1.143131 0.05594826 0.9072767
## 21 0.139833711 1.0786157 0.05286772 1.112166 0.05483119 0.7809819
## 22 0.163250699 1.0311892 0.05039802 1.076194 0.05303857 0.7466423
## 23 0.208225108 0.8960612 0.04304755 1.071372 0.05338824 0.6488015
##
## $eofad_fai
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 0.8084209 0.01927844 0.9992228 0.02494300 0.8271861
## 2 0.01851852 0.8726393 0.04187817 1.0282355 0.05106719 0.8928951
## 3 0.02777778 0.8845877 0.04237726 1.1153304 0.05600822 0.9051208
## 4 0.03688241 0.8476469 0.04063253 0.9754874 0.04818852 0.8673226
## 5 0.04285196 0.9773145 0.04718509 1.2290989 0.06199313 1.0000000
## 6 0.04879444 0.8382336 0.04003188 1.0481911 0.05247057 0.8576907
## 7 0.05555556 0.9330994 0.03628468 1.0782735 0.04317148 0.9547585
## 8 0.05603233 0.8437460 0.05947626 1.0787835 0.08003883 0.8633311
## 9 0.06126237 0.8763696 0.04184610 1.1417479 0.05751645 0.8967119
## 10 0.06746032 0.9373684 0.04511823 1.1285163 0.05635903 0.9591267
## 11 0.07407407 0.9034084 0.04333824 1.1353353 0.05702083 0.9243784
## 12 0.08055556 0.8356628 0.03865868 1.0519301 0.05107898 0.8550603
## 13 0.08731676 0.8559504 0.04229778 1.0751316 0.05570392 0.8758188
## 14 0.09523810 0.8279808 0.03928408 1.0618428 0.05308869 0.8472000
## 15 0.10442850 0.8959026 0.04299995 1.1597716 0.05861420 0.9166983
## 16 0.11111111 0.8507968 0.04052905 1.1205708 0.05648864 0.8705456
## 17 0.12158549 0.8113813 0.03870612 1.0052065 0.05021386 0.8302152
## 18 0.13587208 0.8670064 0.04114675 1.2108588 0.06142758 0.8871314
## 19 0.15277778 0.7574677 0.03581925 1.0300559 0.05208794 0.7750501
## 20 0.17375067 0.7899069 0.03733063 1.1031795 0.05594983 0.8082422
## 21 0.21392121 0.6854525 0.03189870 1.0713914 0.05506833 0.7013632
## 22 0.28488870 0.5294518 0.02404602 0.9361927 0.04893386 0.5417415
##
## $pk_tu
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.0773128 0.03086767 0.9484855 0.02649032 0.6546973
## 2 0.01111111 1.4150964 0.07252230 0.9527728 0.04540179 0.8599728
## 3 0.02464971 1.4280533 0.07255585 1.0553602 0.05083222 0.8678469
## 4 0.03395062 1.4353703 0.07278655 0.9908302 0.04699352 0.8722935
## 5 0.04073720 1.4306315 0.07385398 0.9763828 0.04702504 0.8694137
## 6 0.04629630 1.5231175 0.07805774 1.0362229 0.04966332 0.9256186
## 7 0.05235744 1.6455130 0.08378062 1.1106194 0.05299824 1.0000000
## 8 0.05555556 1.3720253 0.06806620 1.0187923 0.04788702 0.8337979
## 9 0.05934014 1.4385896 0.07557485 1.0458324 0.05193154 0.8742499
## 10 0.06481481 1.5494599 0.07909549 1.0959460 0.05276063 0.9416273
## 11 0.07017508 1.4391622 0.07332345 1.0410079 0.05008302 0.8745979
## 12 0.07540643 1.4253446 0.07235598 1.0632021 0.05125567 0.8662008
## 13 0.08103175 1.4432688 0.07190194 1.1001298 0.05228996 0.8770936
## 14 0.08671745 1.5882392 0.08238948 1.1626220 0.05730440 0.9651940
## 15 0.09338779 1.4411719 0.07250464 1.1321597 0.05465050 0.8758192
## 16 0.10136311 1.3331719 0.06776312 1.0110395 0.04884608 0.8101862
## 17 0.11111111 1.3357492 0.06700176 1.1272979 0.05487961 0.8117524
## 18 0.11952862 1.2850686 0.06479350 1.0070803 0.04852625 0.7809532
## 19 0.13333333 1.2089924 0.06051498 1.0095087 0.04883168 0.7347207
## 20 0.15091308 1.2119697 0.06041727 1.0631501 0.05172224 0.7365300
## 21 0.16861894 1.1181113 0.05535831 1.0304128 0.05021790 0.6794910
## 22 0.20753296 0.9203194 0.04472274 0.9682561 0.04755503 0.5592903
## 23 0.30032918 0.5632364 0.02596805 0.8364516 0.04254534 0.3422862
##
## $pk_het
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 0.8912718 0.02161854 0.9348192 0.02290699 0.6410762
## 2 0.01614621 1.1248307 0.05629911 0.9253306 0.04454259 0.8090710
## 3 0.02777778 1.2304786 0.06190328 0.9688183 0.04656147 0.8850617
## 4 0.03640559 1.3139010 0.06560835 1.1262072 0.05471954 0.9450659
## 5 0.04320988 1.2029304 0.06063344 0.9574647 0.04617708 0.8652468
## 6 0.04976852 1.3902744 0.06986950 1.1735817 0.05729273 1.0000000
## 7 0.05555556 1.0996914 0.05167694 0.9401902 0.04281786 0.7909887
## 8 0.05836165 1.2404415 0.06638303 1.0316688 0.05334417 0.8922278
## 9 0.06609206 1.1638645 0.05799061 1.0106957 0.04901372 0.8371473
## 10 0.07407407 1.1412827 0.05664684 1.0294432 0.05009448 0.8209046
## 11 0.08333333 1.1323193 0.05620705 0.9920483 0.04799182 0.8144574
## 12 0.09090909 1.0355725 0.05105646 0.9947404 0.04864996 0.7448691
## 13 0.10000000 1.0428104 0.05104118 1.0446927 0.05115145 0.7500752
## 14 0.11111111 1.0310880 0.05090059 0.9497071 0.04610555 0.7416435
## 15 0.11799343 1.0661521 0.05283077 0.9688915 0.04710118 0.7668645
## 16 0.13080050 1.0461851 0.05141327 1.0242166 0.05012294 0.7525026
## 17 0.14692665 0.9265272 0.04518503 0.8993777 0.04358334 0.6664347
## 18 0.16626068 0.9413272 0.04598480 0.9641922 0.04733770 0.6770801
## 19 0.18612056 0.9401167 0.04570278 0.9962492 0.04901428 0.6762094
## 20 0.21851852 0.7778995 0.03707980 0.9627871 0.04810116 0.5595294
## 21 0.26147247 0.7082393 0.03342675 0.9186427 0.04601960 0.5094241
## 22 0.33333333 0.6076665 0.02815911 0.8938747 0.04544650 0.4370839
##
## $tu_het
## dar shape1 se1 shape2 se2 shape1_norm
## 1 0.00000000 1.1413357 0.02850221 0.9505076 0.02289607 0.6816328
## 2 0.01525054 1.5593133 0.08030164 1.0184672 0.04870769 0.9312589
## 3 0.02620299 1.5689669 0.08090025 1.0195926 0.04878890 0.9370243
## 4 0.03388598 1.4185959 0.06894206 0.9738205 0.04418918 0.8472192
## 5 0.04006574 1.6744143 0.09148042 1.0702008 0.05427509 1.0000000
## 6 0.04533572 1.5146482 0.07780080 0.9980350 0.04761966 0.9045839
## 7 0.05148698 1.4501633 0.07386391 0.9716071 0.04602504 0.8660720
## 8 0.05555556 1.3458248 0.06429094 0.9925739 0.04483005 0.8037585
## 9 0.05826801 1.3866646 0.07658300 0.9526025 0.04907719 0.8281490
## 10 0.06491879 1.4460038 0.07409663 0.9776776 0.04667364 0.8635878
## 11 0.07205237 1.4437958 0.07387654 0.9905651 0.04735976 0.8622691
## 12 0.07934291 1.3425027 0.06658609 0.9944807 0.04667221 0.8017745
## 13 0.08607712 1.4704006 0.07695645 1.0233080 0.05023085 0.8781582
## 14 0.09409722 1.3043927 0.06577441 1.0173529 0.04900182 0.7790143
## 15 0.10213350 1.3957085 0.07094233 1.0213835 0.04907687 0.8335503
## 16 0.11111111 1.3070458 0.06608986 0.9677725 0.04625162 0.7805988
## 17 0.11754121 1.3060627 0.06654687 0.9466928 0.04538753 0.7800117
## 18 0.12962963 1.1846363 0.05928188 0.9850551 0.04757482 0.7074930
## 19 0.14444444 1.1155891 0.05540726 0.9813051 0.04752546 0.6662563
## 20 0.16666667 1.1546807 0.05769642 0.9756571 0.04717827 0.6896027
## 21 0.19739514 0.9138406 0.04380057 0.9413325 0.04540206 0.5457673
## 22 0.26475376 0.8932055 0.04369817 1.0272942 0.05173889 0.5334436
plotly::ggplotly({
plotDarECDF(dar = dar_E$eofad_wt, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$eofad_wt) %>%
lm(shape1 ~ dar, data = beta_fits_E$eofad_wt) %>%
step() %>%
summary()
## Start: AIC=-99.63
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.19798 -99.634
## - dar 1 0.3348 0.53278 -79.855
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$eofad_wt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30073 -0.02983 0.01965 0.04595 0.16290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.30552 0.03715 35.144 < 2e-16 ***
## dar -1.85562 0.31907 -5.816 1.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09949 on 20 degrees of freedom
## Multiple R-squared: 0.6284, Adjusted R-squared: 0.6098
## F-statistic: 33.82 on 1 and 20 DF, p-value: 1.088e-05
lm_E_eofad_wt <- function(dar){
1.30552 - 1.85562 * dar
}
beta_fits_E$eofad_wt %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_eofad_wt(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_eofad_wt <- res_E$eofad_wt %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$eofad_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_eofad_wt(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_eofad_wt$isDE)
##
## FALSE TRUE
## 1 4
res_E$eofad_wt %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$eofad_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_eofad_wt(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$eofad_wt) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$eofad_wt) %>%
step() %>%
summary()
## Start: AIC=-113.25
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.10662 -113.250
## - dar 1 0.1803 0.28692 -93.471
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$eofad_wt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.22069 -0.02189 0.01442 0.03372 0.11955
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.95805 0.02726 35.144 < 2e-16 ***
## dar -1.36175 0.23415 -5.816 1.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07301 on 20 degrees of freedom
## Multiple R-squared: 0.6284, Adjusted R-squared: 0.6098
## F-statistic: 33.82 on 1 and 20 DF, p-value: 1.088e-05
lm_E_eofad_wt_norm <- function(dar){
1.10422 - 1.56951 * dar
}
dar_tmp <- dar_E$eofad_wt
lm_all <- lm_all %>%
rbind(tibble(
dataset = "E_eofad_wt",
intercept = 1.10422,
slope = -1.56951,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_E$eofad_wt %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_eofad_wt_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_eofad_wt_norm <- res_E$eofad_wt %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$eofad_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_eofad_wt_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_eofad_wt_norm$isDE)
##
## FALSE TRUE
## 4 1
res_E$eofad_wt %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$eofad_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_eofad_wt_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_E$fai_wt, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$fai_wt) %>%
lm(shape1 ~ dar, data = beta_fits_E$fai_wt) %>%
step() %>%
summary()
## Start: AIC=-107.63
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.17946 -107.63
## - dar 1 0.082155 0.26161 -100.96
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$fai_wt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25111 -0.04832 0.01256 0.05955 0.15086
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.29802 0.03523 36.841 < 2e-16 ***
## dar -1.20650 0.38912 -3.101 0.00542 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09244 on 21 degrees of freedom
## Multiple R-squared: 0.314, Adjusted R-squared: 0.2814
## F-statistic: 9.614 on 1 and 21 DF, p-value: 0.005415
lm_E_fai_wt <- function(dar){
1.29802 - 1.20650 * dar
}
beta_fits_E$fai_wt %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_fai_wt(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_fai_wt <- res_E$fai_wt %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$fai_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_fai_wt(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_fai_wt$isDE)
##
## TRUE
## 2
res_E$fai_wt %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$fai_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_fai_wt(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$fai_wt) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$fai_wt) %>%
step() %>%
summary()
## Start: AIC=-122.48
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.094083 -122.48
## - dar 1 0.043071 0.137154 -115.81
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$fai_wt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.181819 -0.034986 0.009092 0.043120 0.109229
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.93984 0.02551 36.841 < 2e-16 ***
## dar -0.87358 0.28174 -3.101 0.00542 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06693 on 21 degrees of freedom
## Multiple R-squared: 0.314, Adjusted R-squared: 0.2814
## F-statistic: 9.614 on 1 and 21 DF, p-value: 0.005415
lm_E_fai_wt_norm <- function(dar){
1.04365 - 0.97007 * dar
}
dar_tmp <- dar_E$fai_wt
lm_all <- lm_all %>%
rbind(tibble(
dataset = "E_fai_wt",
intercept = 1.04365,
slope = -0.97007,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_E$fai_wt %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_fai_wt_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_fai_wt_norm <- res_E$fai_wt %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$fai_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_fai_wt_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_fai_wt_norm$isDE)
##
## TRUE
## 2
res_E$fai_wt %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$fai_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_fai_wt_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_E$eofad_fai, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$eofad_fai) %>%
lm(shape1 ~ dar, data = beta_fits_E$eofad_fai) %>%
step() %>%
summary()
## Start: AIC=-123.25
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.067664 -123.25
## - dar 1 0.11355 0.181212 -103.58
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$eofad_fai)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.13083 -0.03195 -0.00184 0.04361 0.08474
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.93925 0.02147 43.748 < 2e-16 ***
## dar -1.08928 0.18802 -5.793 1.14e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05817 on 20 degrees of freedom
## Multiple R-squared: 0.6266, Adjusted R-squared: 0.6079
## F-statistic: 33.56 on 1 and 20 DF, p-value: 1.143e-05
lm_E_eofad_fai <- function(dar){
0.93925 - 1.08928 * dar
}
beta_fits_E$eofad_fai %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_eofad_fai(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_eofad_fai <- res_E$eofad_fai %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$eofad_fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_eofad_fai(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_eofad_fai$isDE)
##
## FALSE TRUE
## 9 5
res_E$eofad_fai %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$eofad_fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_eofad_fai(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$eofad_fai) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$eofad_fai) %>%
step() %>%
summary()
## Start: AIC=-122.24
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.070842 -122.24
## - dar 1 0.11888 0.189722 -102.57
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$eofad_fai)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.133868 -0.032696 -0.001883 0.044625 0.086708
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.96105 0.02197 43.748 < 2e-16 ***
## dar -1.11457 0.19239 -5.793 1.14e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05952 on 20 degrees of freedom
## Multiple R-squared: 0.6266, Adjusted R-squared: 0.6079
## F-statistic: 33.56 on 1 and 20 DF, p-value: 1.143e-05
lm_E_eofad_fai_norm <- function(dar){
1.10601 - 1.28268 * dar
}
dar_tmp <- dar_E$eofad_fai
lm_all <- lm_all %>%
rbind(tibble(
dataset = "E_eofad_fai",
intercept = 1.10601,
slope = -1.28268,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_E$eofad_fai %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_eofad_fai_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_eofad_fai_norm <- res_E$eofad_fai %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$eofad_fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_eofad_fai_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_eofad_fai_norm$isDE)
##
## FALSE TRUE
## 8 6
res_E$eofad_fai %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$eofad_fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_eofad_fai_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_E$pk_tu, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$pk_tu) %>%
lm(shape1 ~ dar, data = beta_fits_E$pk_tu) %>%
step() %>%
summary()
## Start: AIC=-84.24
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.49598 -84.244
## - dar 1 0.74863 1.24461 -65.083
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$pk_tu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50010 -0.05260 0.03008 0.06502 0.24551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.57741 0.05415 29.13 < 2e-16 ***
## dar -2.70633 0.48070 -5.63 1.38e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1537 on 21 degrees of freedom
## Multiple R-squared: 0.6015, Adjusted R-squared: 0.5825
## F-statistic: 31.7 on 1 and 21 DF, p-value: 1.377e-05
lm_E_pk_tu <- function(dar){
1.57741 - 2.70633 * dar
}
beta_fits_E$pk_tu %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_pk_tu(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_pk_tu <- res_E$pk_tu %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$pk_tu) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_pk_tu(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_pk_tu$isDE)
##
## FALSE TRUE
## 5 2
res_E$pk_tu %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$pk_tu) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_pk_tu(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$pk_tu) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$pk_tu) %>%
step() %>%
summary()
## Start: AIC=-107.15
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.18317 -107.155
## - dar 1 0.27648 0.45966 -87.994
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$pk_tu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30392 -0.03197 0.01828 0.03951 0.14920
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9586 0.0329 29.13 < 2e-16 ***
## dar -1.6447 0.2921 -5.63 1.38e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09339 on 21 degrees of freedom
## Multiple R-squared: 0.6015, Adjusted R-squared: 0.5825
## F-statistic: 31.7 on 1 and 21 DF, p-value: 1.377e-05
lm_E_pk_tu_norm <- function(dar){
1.10669 - 1.89872 * dar
}
dar_tmp <- dar_E$pk_tu
lm_all <- lm_all %>%
rbind(tibble(
dataset = "E_pk_tu",
intercept = 1.10669,
slope = -1.89872,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_E$pk_tu %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_pk_tu_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_pk_tu_norm <- res_E$pk_tu %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$pk_tu) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_pk_tu_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_pk_tu_norm$isDE)
##
## FALSE TRUE
## 6 1
res_E$pk_tu %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$pk_tu) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_pk_tu_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_E$pk_het, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$pk_het) %>%
lm(shape1 ~ dar, data = beta_fits_E$pk_het) %>%
step() %>%
summary()
## Start: AIC=-95.03
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.24408 -95.029
## - dar 1 0.52135 0.76542 -71.884
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$pk_het)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36024 -0.04638 0.01646 0.03780 0.23264
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.25151 0.03905 32.051 < 2e-16 ***
## dar -1.88631 0.28860 -6.536 2.27e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1105 on 20 degrees of freedom
## Multiple R-squared: 0.6811, Adjusted R-squared: 0.6652
## F-statistic: 42.72 on 1 and 20 DF, p-value: 2.274e-06
lm_E_pk_het <- function(dar){
1.25151 - 1.88631 * dar
}
beta_fits_E$pk_het %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_pk_het(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_pk_het <- res_E$pk_het %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$pk_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_pk_het(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_pk_het$isDE)
##
## FALSE TRUE
## 2 2
res_E$pk_het %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$pk_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_pk_het(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$pk_het) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$pk_het) %>%
step() %>%
summary()
## Start: AIC=-109.53
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.12628 -109.527
## - dar 1 0.26973 0.39601 -86.382
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$pk_het)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25911 -0.03336 0.01184 0.02719 0.16733
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.90019 0.02809 32.051 < 2e-16 ***
## dar -1.35679 0.20758 -6.536 2.27e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07946 on 20 degrees of freedom
## Multiple R-squared: 0.6811, Adjusted R-squared: 0.6652
## F-statistic: 42.72 on 1 and 20 DF, p-value: 2.274e-06
lm_E_pk_het_norm <- function(dar){
1.18496 - 1.78599 * dar
}
dar_tmp <- dar_E$pk_het
lm_all <- lm_all %>%
rbind(tibble(
dataset = "E_pk_het",
intercept = 1.18496,
slope = -1.78599,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_E$pk_het %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_pk_het_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_pk_het_norm <- res_E$pk_het %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$pk_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_pk_het_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_pk_het_norm$isDE)
##
## FALSE TRUE
## 2 2
res_E$pk_het %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$pk_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_pk_het_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
plotly::ggplotly({
plotDarECDF(dar = dar_E$tu_het, dar_val = "origin")
})
# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$tu_het) %>%
lm(shape1 ~ dar, data = beta_fits_E$tu_het) %>%
step() %>%
summary()
## Start: AIC=-89.84
## shape1 ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.30901 -89.839
## - dar 1 0.52863 0.83764 -69.901
##
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$tu_het)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41529 -0.04101 0.01946 0.06402 0.21831
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.55663 0.04644 33.517 < 2e-16 ***
## dar -2.50894 0.42893 -5.849 1.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1243 on 20 degrees of freedom
## Multiple R-squared: 0.6311, Adjusted R-squared: 0.6126
## F-statistic: 34.21 on 1 and 20 DF, p-value: 1.01e-05
lm_E_tu_het <- function(dar){
1.55663 - 2.50894 * dar
}
beta_fits_E$tu_het %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_tu_het(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1)) +
geom_point(
data = . %>% distinct(dar, shape1)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_tu_het <- res_E$tu_het %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$tu_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_tu_het(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_tu_het$isDE)
## < table of extent 0 >
res_E$tu_het %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$tu_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_tu_het(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$tu_het) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$tu_het) %>%
step() %>%
summary()
## Start: AIC=-112.52
## shape1_norm ~ dar
##
## Df Sum of Sq RSS AIC
## <none> 0.11022 -112.520
## - dar 1 0.18855 0.29877 -92.581
##
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$tu_het)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24802 -0.02449 0.01162 0.03823 0.13038
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.92966 0.02774 33.517 < 2e-16 ***
## dar -1.49840 0.25617 -5.849 1.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07424 on 20 degrees of freedom
## Multiple R-squared: 0.6311, Adjusted R-squared: 0.6126
## F-statistic: 34.21 on 1 and 20 DF, p-value: 1.01e-05
lm_E_tu_het_norm <- function(dar){
1.13935 - 1.83637 * dar
}
dar_tmp <- dar_E$tu_het
lm_all <- lm_all %>%
rbind(tibble(
dataset = "E_tu_het",
intercept = 1.13935,
slope = -1.83637,
dar_mean = mean(dar_tmp$dar_origin),
dar_median = median(dar_tmp$dar_origin),
dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
))
beta_fits_E$tu_het %>%
mutate(
`1 - DAR` = 1 - dar,
`sqrt(1 - DAR)` = sqrt(1 - dar),
`(1.1 - DAR)^2` = (1.1 - dar)^2,
`DAR lm` = lm_E_tu_het_norm(dar)
) %>%
pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
ggplot(aes(dar, shape1_norm)) +
geom_point(
data = . %>% distinct(dar, shape1_norm)
) +
geom_line(aes(y = estimate, colour = f))
darP_E_tu_het_norm <- res_E$tu_het %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$tu_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_tu_het_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1),
FDR_darP = p.adjust(darP, "fdr"),
wasDE = FDR < 0.05,
isDE = FDR_darP < 0.05
) %>%
dplyr::filter(wasDE | isDE) %>%
dplyr::arrange(!isDE, darP) %>%
as.data.frame()
table(darP_E_tu_het_norm$isDE)
## < table of extent 0 >
res_E$tu_het %>%
as_tibble() %>%
dplyr::select(
range, starts_with("gene"), starts_with("log"), PValue, FDR
) %>%
left_join(
mcols(gene_dar_E$tu_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
) %>%
mutate(
dar = ifelse(is.na(dar), 0, dar),
alpha = lm_E_tu_het_norm(dar),
alpha = ifelse(alpha > 1, 1, alpha),
alpha = ifelse(alpha < 0.1, 0.1, alpha),
darP = pbeta(PValue, alpha, 1)
) %>%
pivot_longer(
cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
) %>%
ggplot(aes(sample = -log(p), colour = type)) +
stat_qq(distribution = qexp) +
stat_qq_line(distribution = qexp) +
facet_wrap(~type) +
scale_colour_brewer(palette= "Set1")
lm_all %>%
lb_reactable(
pagination = FALSE,
columns = list(
dar_mean = colDef(cell = react_format("f")),
dar_median = colDef(cell = react_format("f")),
dar_mean_0 = colDef(cell = react_format("f")),
dar_median_0 = colDef(cell = react_format("f"))
)
)
lm_all %>%
ggplot() +
geom_abline(aes(intercept = intercept, slope = slope)) +
scale_x_continuous(limits = c(0, 1)) +
scale_y_continuous(limits = c(0, 1.3)) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_colour_viridis()
scaling the alpha: - problem we have is that we have a mix of distributions, the null and where there is a true difference. - if we don’t scale, we’ll be overcorrecting the p values to the point where we will lose the true signal - use the median or max alpha value